diff options
Diffstat (limited to 'REORG.TODO/manual')
109 files changed, 88428 insertions, 0 deletions
diff --git a/REORG.TODO/manual/Makefile b/REORG.TODO/manual/Makefile new file mode 100644 index 0000000000..510f160d3b --- /dev/null +++ b/REORG.TODO/manual/Makefile @@ -0,0 +1,196 @@ +# Copyright (C) 1992-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +# Makefile for the GNU C Library manual. + +subdir := manual + +include ../Makeconfig + +.PHONY: dvi pdf info html + +dvi: $(objpfx)libc.dvi +pdf: $(objpfx)libc.pdf + +TEXI2DVI = texi2dvi +TEXI2PDF = texi2dvi --pdf + +ifneq ($(strip $(MAKEINFO)),:) +info: $(objpfx)libc.info +endif + +chapters = $(addsuffix .texi, \ + intro errno memory ctype string charset locale \ + message search pattern io stdio llio filesys \ + pipe socket terminal syslog math arith time \ + resource setjmp signal startup process ipc job \ + nss users sysinfo conf crypt debug threads \ + probes tunables) +add-chapters = $(wildcard $(foreach d, $(add-ons), ../$d/$d.texi)) +appendices = lang.texi header.texi install.texi maint.texi platform.texi \ + contrib.texi +licenses = freemanuals.texi lgpl-2.1.texi fdl-1.3.texi + +-include $(objpfx)texis +$(objpfx)texis: texis.awk $(chapters) $(add-chapters) $(appendices) $(licenses) + $(make-target-directory) + $(AWK) -f $^ > $@.T + mv -f $@.T $@ + +nonexamples = $(filter-out $(add-chapters) %.c.texi, $(texis)) +examples = $(filter-out $(foreach d, $(add-ons), ../$d/%.c.texi), \ + $(filter %.c.texi, $(texis))) + +# Generated files directly included from libc.texinfo. +libc-texi-generated = chapters.texi top-menu.texi dir-add.texi \ + libm-err.texi version.texi pkgvers.texi + +# Add path to build dir for generated files +texis-path := $(filter-out $(libc-texi-generated) summary.texi $(examples), \ + $(texis)) \ + $(addprefix $(objpfx),$(filter $(libc-texi-generated) summary.texi \ + $(examples), $(texis))) + +# Kludge: implicit rule so Make knows the one command does it all. +chapters.% top-menu.%: libc-texinfo.sh $(texis-path) Makefile + AWK=$(AWK) $(SHELL) $< $(objpfx) \ + '$(chapters)' \ + '$(add-chapters)' \ + '$(appendices) $(licenses)' + + +$(objpfx)libc.dvi $(objpfx)libc.pdf $(objpfx)libc.info: \ + $(addprefix $(objpfx),$(libc-texi-generated)) +$(objpfx)libc.dvi $(objpfx)libc.pdf: texinfo.tex + +html: $(objpfx)libc/index.html +$(objpfx)libc/index.html: $(addprefix $(objpfx),$(libc-texi-generated)) + $(MAKEINFO) -P $(objpfx) -o $(objpfx)libc --html libc.texinfo + +# Generate the summary from the Texinfo source files for each chapter. +$(objpfx)summary.texi: $(objpfx)stamp-summary ; +$(objpfx)stamp-summary: summary.awk $(filter-out $(objpfx)summary.texi, \ + $(texis-path)) + $(SHELL) ./check-safety.sh $(filter-out $(objpfx)%, $(texis-path)) + $(AWK) -f $^ | sort -t'' -df -k 1,1 | tr '\014' '\012' \ + > $(objpfx)summary-tmp + $(move-if-change) $(objpfx)summary-tmp $(objpfx)summary.texi + touch $@ + +# Generate a file which can be added to the `dir' content to provide direct +# access to the documentation of the function, variables, and other +# definitions. +$(objpfx)dir-add.texi: xtract-typefun.awk $(texis-path) + (echo "@dircategory GNU C library functions and macros"; \ + echo "@direntry"; \ + $(AWK) -f $^ | sort; \ + echo "@end direntry") > $@.new + mv -f $@.new $@ + +# The table with the math errors is generated. +$(objpfx)libm-err.texi: $(objpfx)stamp-libm-err +$(objpfx)stamp-libm-err: libm-err-tab.pl $(wildcard $(foreach dir,$(sysdirs),\ + $(dir)/libm-test-ulps)) + pwd=`pwd`; \ + $(PERL) $< $$pwd/.. > $(objpfx)libm-err-tmp + $(move-if-change) $(objpfx)libm-err-tmp $(objpfx)libm-err.texi + touch $@ + +# Package version and bug reporting URL. +$(objpfx)pkgvers.texi: $(objpfx)stamp-pkgvers ; +$(objpfx)stamp-pkgvers: $(common-objpfx)config.make + echo "@ifclear PKGVERS" > $(objpfx)pkgvers-tmp + echo "@set PKGVERS" >> $(objpfx)pkgvers-tmp + echo "@set PKGVERSION $(PKGVERSION_TEXI)" >> $(objpfx)pkgvers-tmp + if [ "$(PKGVERSION_TEXI)" = "(GNU libc) " ]; then \ + echo "@set PKGVERSION_DEFAULT" >> $(objpfx)pkgvers-tmp; \ + fi + echo "@set REPORT_BUGS_TO $(REPORT_BUGS_TEXI)" >> $(objpfx)pkgvers-tmp + echo "@end ifclear" >> $(objpfx)pkgvers-tmp + $(move-if-change) $(objpfx)pkgvers-tmp $(objpfx)pkgvers.texi + touch $@ + +# Generate a file with the version number. +$(objpfx)version.texi: $(objpfx)stamp-version ; +$(objpfx)stamp-version: $(common-objpfx)config.make + echo "@set VERSION $(version)" > $(objpfx)version-tmp + $(move-if-change) $(objpfx)version-tmp $(objpfx)version.texi + touch $@ + +# Generate Texinfo files from the C source for the example programs. +$(objpfx)%.c.texi: examples/%.c + sed -e '1,/^\*\/$$/d' \ + -e 's,[{}],@&,g' \ + -e 's,/\*\(@.*\)\*/,\1,g' \ + -e 's,/\* *,/* @r{,g' -e 's, *\*/,} */,' \ + -e 's/\(@[a-z][a-z]*\)@{\([^}]*\)@}/\1{\2}/g'\ + $< | expand > $@.new + mv -f $@.new $@ + +$(objpfx)%.info: %.texinfo + LANGUAGE=C LC_ALL=C $(MAKEINFO) -P $(objpfx) --output=$@ $< + +$(objpfx)%.dvi: %.texinfo + cd $(objpfx);$(TEXI2DVI) -I $(shell cd $(<D) && pwd) --output=$@ \ + $(shell cd $(<D) && pwd)/$< + +$(objpfx)%.pdf: %.texinfo + cd $(objpfx);$(TEXI2PDF) -I $(shell cd $(<D) && pwd) --output=$@ \ + $(shell cd $(<D) && pwd)/$< + + +# Distribution. +minimal-dist = summary.awk texis.awk tsort.awk libc-texinfo.sh libc.texinfo \ + libm-err.texi stamp-libm-err check-safety.sh \ + $(filter-out summary.texi, $(nonexamples)) \ + $(patsubst %.c.texi,examples/%.c, $(examples)) + +indices = cp fn pg tp vr ky +generated-dirs += libc +generated += libc.dvi libc.pdf libc.tmp libc.info* \ + stubs \ + texis summary.texi stamp-summary *.c.texi \ + $(foreach index,$(indices),libc.$(index) libc.$(index)s) \ + libc.log libc.aux libc.toc \ + $(libc-texi-generated) \ + stamp-libm-err stamp-version + +include ../Rules + +.PHONY: install subdir_install install-data +install-data subdir_install: install +# libm-err.texi generation requires perl. +ifneq ($(PERL),no) +ifneq ($(strip $(MAKEINFO)),:) +install: $(inst_infodir)/libc.info + @if $(SHELL) -c '$(INSTALL_INFO) --version' >/dev/null 2>&1; then \ + test -f $(inst_infodir)/dir || $(INSTALL_DATA) dir $(inst_infodir);\ + $(INSTALL_INFO) --info-dir=$(inst_infodir) $(inst_infodir)/libc.info;\ + else : ; fi +endif +endif +# Catchall implicit rule for other installation targets from the parent. +install-%: ; + +$(inst_infodir)/libc.info: $(objpfx)libc.info + $(make-target-directory) + for file in $<*; do \ + $(INSTALL_DATA) $$file $(@D)/; \ + done + +TAGS: $(minimal-dist) + $(ETAGS) -o $@ $^ diff --git a/REORG.TODO/manual/argp.texi b/REORG.TODO/manual/argp.texi new file mode 100644 index 0000000000..bca3ca5ed9 --- /dev/null +++ b/REORG.TODO/manual/argp.texi @@ -0,0 +1,1349 @@ +@node Argp, Suboptions, Getopt, Parsing Program Arguments +@need 5000 +@section Parsing Program Options with Argp +@cindex argp (program argument parser) +@cindex argument parsing with argp +@cindex option parsing with argp + +@dfn{Argp} is an interface for parsing unix-style argument vectors. +@xref{Program Arguments}. + +Argp provides features unavailable in the more commonly used +@code{getopt} interface. These features include automatically producing +output in response to the @samp{--help} and @samp{--version} options, as +described in the GNU coding standards. Using argp makes it less likely +that programmers will neglect to implement these additional options or +keep them up to date. + +Argp also provides the ability to merge several independently defined +option parsers into one, mediating conflicts between them and making the +result appear seamless. A library can export an argp option parser that +user programs might employ in conjunction with their own option parsers, +resulting in less work for the user programs. Some programs may use only +argument parsers exported by libraries, thereby achieving consistent and +efficient option-parsing for abstractions implemented by the libraries. + +@pindex argp.h +The header file @file{<argp.h>} should be included to use argp. + +@subsection The @code{argp_parse} Function + +The main interface to argp is the @code{argp_parse} function. In many +cases, calling @code{argp_parse} is the only argument-parsing code +needed in @code{main}. +@xref{Program Arguments}. + +@comment argp.h +@comment GNU +@deftypefun {error_t} argp_parse (const struct argp *@var{argp}, int @var{argc}, char **@var{argv}, unsigned @var{flags}, int *@var{arg_index}, void *@var{input}) +@safety{@prelim{}@mtunsafe{@mtasurace{:argpbuf} @mtslocale{} @mtsenv{}}@asunsafe{@ascuheap{} @ascuintl{} @asulock{} @asucorrupt{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +@c Optionally alloca()tes standard help options, initializes the parser, +@c then parses individual args in a loop, and then finalizes. +@c parser_init +@c calc_sizes ok +@c option_is_end ok +@c malloc @ascuheap @acsmem +@c parser_convert @mtslocale +@c convert_options @mtslocale +@c option_is_end ok +@c option_is_short ok +@c isprint, but locale may change within the loop +@c find_long_option ok +@c group_parse +@c group->parser (from argp->parser) +@c parser_parse_next +@c getopt_long(_only)_r many issues, same as non_r minus @mtasurace +@c parser_parse_arg +@c group_parse dup +@c parser_parse_opt +@c group_parse dup +@c argp_error dup @mtasurace:argpbuf @mtsenv @mtslocale @ascuheap @ascuintl @asucorrupt @acsmem @acucorrupt @aculock +@c dgettext (bad key error) dup @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c parser_finalize +@c group_parse +@c fprintf dup @mtslocale @asucorrupt @aculock @acucorrupt [no @ascuheap @acsmem] +@c dgettext dup @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c arg_state_help +@c free dup @ascuhelp @acsmem +The @code{argp_parse} function parses the arguments in @var{argv}, of +length @var{argc}, using the argp parser @var{argp}. @xref{Argp +Parsers}. Passing a null pointer for @var{argp} is the same as using +a @code{struct argp} containing all zeros. + +@var{flags} is a set of flag bits that modify the parsing behavior. +@xref{Argp Flags}. @var{input} is passed through to the argp parser +@var{argp}, and has meaning defined by @var{argp}. A typical usage is +to pass a pointer to a structure which is used for specifying +parameters to the parser and passing back the results. + +Unless the @code{ARGP_NO_EXIT} or @code{ARGP_NO_HELP} flags are included +in @var{flags}, calling @code{argp_parse} may result in the program +exiting. This behavior is true if an error is detected, or when an +unknown option is encountered. @xref{Program Termination}. + +If @var{arg_index} is non-null, the index of the first unparsed option +in @var{argv} is returned as a value. + +The return value is zero for successful parsing, or an error code +(@pxref{Error Codes}) if an error is detected. Different argp parsers +may return arbitrary error codes, but the standard error codes are: +@code{ENOMEM} if a memory allocation error occurred, or @code{EINVAL} if +an unknown option or option argument is encountered. +@end deftypefun + +@menu +* Globals: Argp Global Variables. Global argp parameters. +* Parsers: Argp Parsers. Defining parsers for use with @code{argp_parse}. +* Flags: Argp Flags. Flags that modify the behavior of @code{argp_parse}. +* Help: Argp Help. Printing help messages when not parsing. +* Examples: Argp Examples. Simple examples of programs using argp. +* Customization: Argp User Customization. + Users may control the @samp{--help} output format. +@end menu + +@node Argp Global Variables, Argp Parsers, , Argp +@subsection Argp Global Variables + +These variables make it easy for user programs to implement the +@samp{--version} option and provide a bug-reporting address in the +@samp{--help} output. These are implemented in argp by default. + +@comment argp.h +@comment GNU +@deftypevar {const char *} argp_program_version +If defined or set by the user program to a non-zero value, then a +@samp{--version} option is added when parsing with @code{argp_parse}, +which will print the @samp{--version} string followed by a newline and +exit. The exception to this is if the @code{ARGP_NO_EXIT} flag is used. +@end deftypevar + +@comment argp.h +@comment GNU +@deftypevar {const char *} argp_program_bug_address +If defined or set by the user program to a non-zero value, +@code{argp_program_bug_address} should point to a string that will be +printed at the end of the standard output for the @samp{--help} option, +embedded in a sentence that says @samp{Report bugs to @var{address}.}. +@end deftypevar + +@need 1500 +@comment argp.h +@comment GNU +@defvar argp_program_version_hook +If defined or set by the user program to a non-zero value, a +@samp{--version} option is added when parsing with @code{arg_parse}, +which prints the program version and exits with a status of zero. This +is not the case if the @code{ARGP_NO_HELP} flag is used. If the +@code{ARGP_NO_EXIT} flag is set, the exit behavior of the program is +suppressed or modified, as when the argp parser is going to be used by +other programs. + +It should point to a function with this type of signature: + +@smallexample +void @var{print-version} (FILE *@var{stream}, struct argp_state *@var{state}) +@end smallexample + +@noindent +@xref{Argp Parsing State}, for an explanation of @var{state}. + +This variable takes precedence over @code{argp_program_version}, and is +useful if a program has version information not easily expressed in a +simple string. +@end defvar + +@comment argp.h +@comment GNU +@deftypevar error_t argp_err_exit_status +This is the exit status used when argp exits due to a parsing error. If +not defined or set by the user program, this defaults to: +@code{EX_USAGE} from @file{<sysexits.h>}. +@end deftypevar + +@node Argp Parsers, Argp Flags, Argp Global Variables, Argp +@subsection Specifying Argp Parsers + +The first argument to the @code{argp_parse} function is a pointer to a +@code{struct argp}, which is known as an @dfn{argp parser}: + +@comment argp.h +@comment GNU +@deftp {Data Type} {struct argp} +This structure specifies how to parse a given set of options and +arguments, perhaps in conjunction with other argp parsers. It has the +following fields: + +@table @code +@item const struct argp_option *options +A pointer to a vector of @code{argp_option} structures specifying which +options this argp parser understands; it may be zero if there are no +options at all. @xref{Argp Option Vectors}. + +@item argp_parser_t parser +A pointer to a function that defines actions for this parser; it is +called for each option parsed, and at other well-defined points in the +parsing process. A value of zero is the same as a pointer to a function +that always returns @code{ARGP_ERR_UNKNOWN}. @xref{Argp Parser +Functions}. + +@item const char *args_doc +If non-zero, a string describing what non-option arguments are called by +this parser. This is only used to print the @samp{Usage:} message. If +it contains newlines, the strings separated by them are considered +alternative usage patterns and printed on separate lines. Lines after +the first are prefixed by @samp{ or: } instead of @samp{Usage:}. + +@item const char *doc +If non-zero, a string containing extra text to be printed before and +after the options in a long help message, with the two sections +separated by a vertical tab (@code{'\v'}, @code{'\013'}) character. By +convention, the documentation before the options is just a short string +explaining what the program does. Documentation printed after the +options describe behavior in more detail. + +@item const struct argp_child *children +A pointer to a vector of @code{argp_child} structures. This pointer +specifies which additional argp parsers should be combined with this +one. @xref{Argp Children}. + +@item char *(*help_filter)(int @var{key}, const char *@var{text}, void *@var{input}) +If non-zero, a pointer to a function that filters the output of help +messages. @xref{Argp Help Filtering}. + +@item const char *argp_domain +If non-zero, the strings used in the argp library are translated using +the domain described by this string. If zero, the current default domain +is used. + +@end table +@end deftp + +Of the above group, @code{options}, @code{parser}, @code{args_doc}, and +the @code{doc} fields are usually all that are needed. If an argp +parser is defined as an initialized C variable, only the fields used +need be specified in the initializer. The rest will default to zero due +to the way C structure initialization works. This design is exploited in +most argp structures; the most-used fields are grouped near the +beginning, the unused fields left unspecified. + +@menu +* Options: Argp Option Vectors. Specifying options in an argp parser. +* Argp Parser Functions:: Defining actions for an argp parser. +* Children: Argp Children. Combining multiple argp parsers. +* Help Filtering: Argp Help Filtering. Customizing help output for an argp parser. +@end menu + +@node Argp Option Vectors, Argp Parser Functions, Argp Parsers, Argp Parsers +@subsection Specifying Options in an Argp Parser + +The @code{options} field in a @code{struct argp} points to a vector of +@code{struct argp_option} structures, each of which specifies an option +that the argp parser supports. Multiple entries may be used for a single +option provided it has multiple names. This should be terminated by an +entry with zero in all fields. Note that when using an initialized C +array for options, writing @code{@{ 0 @}} is enough to achieve this. + +@comment argp.h +@comment GNU +@deftp {Data Type} {struct argp_option} +This structure specifies a single option that an argp parser +understands, as well as how to parse and document that option. It has +the following fields: + +@table @code +@item const char *name +The long name for this option, corresponding to the long option +@samp{--@var{name}}; this field may be zero if this option @emph{only} +has a short name. To specify multiple names for an option, additional +entries may follow this one, with the @code{OPTION_ALIAS} flag +set. @xref{Argp Option Flags}. + +@item int key +The integer key provided by the current option to the option parser. If +@var{key} has a value that is a printable @sc{ascii} character (i.e., +@code{isascii (@var{key})} is true), it @emph{also} specifies a short +option @samp{-@var{char}}, where @var{char} is the @sc{ascii} character +with the code @var{key}. + +@item const char *arg +If non-zero, this is the name of an argument associated with this +option, which must be provided (e.g., with the +@samp{--@var{name}=@var{value}} or @samp{-@var{char} @var{value}} +syntaxes), unless the @code{OPTION_ARG_OPTIONAL} flag (@pxref{Argp +Option Flags}) is set, in which case it @emph{may} be provided. + +@item int flags +Flags associated with this option, some of which are referred to above. +@xref{Argp Option Flags}. + +@item const char *doc +A documentation string for this option, for printing in help messages. + +If both the @code{name} and @code{key} fields are zero, this string +will be printed tabbed left from the normal option column, making it +useful as a group header. This will be the first thing printed in its +group. In this usage, it's conventional to end the string with a +@samp{:} character. + +@item int group +Group identity for this option. + +In a long help message, options are sorted alphabetically within each +group, and the groups presented in the order 0, 1, 2, @dots{}, @var{n}, +@minus{}@var{m}, @dots{}, @minus{}2, @minus{}1. + +Every entry in an options array with this field 0 will inherit the group +number of the previous entry, or zero if it's the first one. If it's a +group header with @code{name} and @code{key} fields both zero, the +previous entry + 1 is the default. Automagic options such as +@samp{--help} are put into group @minus{}1. + +Note that because of C structure initialization rules, this field often +need not be specified, because 0 is the correct value. +@end table +@end deftp + + +@menu +* Flags: Argp Option Flags. Flags for options. +@end menu + +@node Argp Option Flags, , , Argp Option Vectors +@subsubsection Flags for Argp Options + +The following flags may be or'd together in the @code{flags} field of a +@code{struct argp_option}. These flags control various aspects of how +that option is parsed or displayed in help messages: + + +@vtable @code +@comment argp.h +@comment GNU +@item OPTION_ARG_OPTIONAL +The argument associated with this option is optional. + +@comment argp.h +@comment GNU +@item OPTION_HIDDEN +This option isn't displayed in any help messages. + +@comment argp.h +@comment GNU +@item OPTION_ALIAS +This option is an alias for the closest previous non-alias option. This +means that it will be displayed in the same help entry, and will inherit +fields other than @code{name} and @code{key} from the option being +aliased. + + +@comment argp.h +@comment GNU +@item OPTION_DOC +This option isn't actually an option and should be ignored by the actual +option parser. It is an arbitrary section of documentation that should +be displayed in much the same manner as the options. This is known as a +@dfn{documentation option}. + +If this flag is set, then the option @code{name} field is displayed +unmodified (e.g., no @samp{--} prefix is added) at the left-margin where +a @emph{short} option would normally be displayed, and this +documentation string is left in its usual place. For purposes of +sorting, any leading whitespace and punctuation is ignored, unless the +first non-whitespace character is @samp{-}. This entry is displayed +after all options, after @code{OPTION_DOC} entries with a leading +@samp{-}, in the same group. + +@comment argp.h +@comment GNU +@item OPTION_NO_USAGE +This option shouldn't be included in `long' usage messages, but should +still be included in other help messages. This is intended for options +that are completely documented in an argp's @code{args_doc} +field. @xref{Argp Parsers}. Including this option in the generic usage +list would be redundant, and should be avoided. + +For instance, if @code{args_doc} is @code{"FOO BAR\n-x BLAH"}, and the +@samp{-x} option's purpose is to distinguish these two cases, @samp{-x} +should probably be marked @code{OPTION_NO_USAGE}. +@end vtable + +@node Argp Parser Functions, Argp Children, Argp Option Vectors, Argp Parsers +@subsection Argp Parser Functions + +The function pointed to by the @code{parser} field in a @code{struct +argp} (@pxref{Argp Parsers}) defines what actions take place in response +to each option or argument parsed. It is also used as a hook, allowing a +parser to perform tasks at certain other points during parsing. + +@need 2000 +Argp parser functions have the following type signature: + +@cindex argp parser functions +@smallexample +error_t @var{parser} (int @var{key}, char *@var{arg}, struct argp_state *@var{state}) +@end smallexample + +@noindent +where the arguments are as follows: + +@table @var +@item key +For each option that is parsed, @var{parser} is called with a value of +@var{key} from that option's @code{key} field in the option +vector. @xref{Argp Option Vectors}. @var{parser} is also called at +other times with special reserved keys, such as @code{ARGP_KEY_ARG} for +non-option arguments. @xref{Argp Special Keys}. + +@item arg +If @var{key} is an option, @var{arg} is its given value. This defaults +to zero if no value is specified. Only options that have a non-zero +@code{arg} field can ever have a value. These must @emph{always} have a +value unless the @code{OPTION_ARG_OPTIONAL} flag is specified. If the +input being parsed specifies a value for an option that doesn't allow +one, an error results before @var{parser} ever gets called. + +If @var{key} is @code{ARGP_KEY_ARG}, @var{arg} is a non-option +argument. Other special keys always have a zero @var{arg}. + +@item state +@var{state} points to a @code{struct argp_state}, containing useful +information about the current parsing state for use by +@var{parser}. @xref{Argp Parsing State}. +@end table + +When @var{parser} is called, it should perform whatever action is +appropriate for @var{key}, and return @code{0} for success, +@code{ARGP_ERR_UNKNOWN} if the value of @var{key} is not handled by this +parser function, or a unix error code if a real error +occurred. @xref{Error Codes}. + +@comment argp.h +@comment GNU +@deftypevr Macro int ARGP_ERR_UNKNOWN +Argp parser functions should return @code{ARGP_ERR_UNKNOWN} for any +@var{key} value they do not recognize, or for non-option arguments +(@code{@var{key} == ARGP_KEY_ARG}) that they are not equipped to handle. +@end deftypevr + +@need 3000 +A typical parser function uses a switch statement on @var{key}: + +@smallexample +error_t +parse_opt (int key, char *arg, struct argp_state *state) +@{ + switch (key) + @{ + case @var{option_key}: + @var{action} + break; + @dots{} + default: + return ARGP_ERR_UNKNOWN; + @} + return 0; +@} +@end smallexample + +@menu +* Keys: Argp Special Keys. Special values for the @var{key} argument. +* State: Argp Parsing State. What the @var{state} argument refers to. +* Functions: Argp Helper Functions. Functions to help during argp parsing. +@end menu + +@node Argp Special Keys, Argp Parsing State, , Argp Parser Functions +@subsubsection Special Keys for Argp Parser Functions + +In addition to key values corresponding to user options, the @var{key} +argument to argp parser functions may have a number of other special +values. In the following example @var{arg} and @var{state} refer to +parser function arguments. @xref{Argp Parser Functions}. + +@vtable @code +@comment argp.h +@comment GNU +@item ARGP_KEY_ARG +This is not an option at all, but rather a command line argument, whose +value is pointed to by @var{arg}. + +When there are multiple parser functions in play due to argp parsers +being combined, it's impossible to know which one will handle a specific +argument. Each is called until one returns 0 or an error other than +@code{ARGP_ERR_UNKNOWN}; if an argument is not handled, +@code{argp_parse} immediately returns success, without parsing any more +arguments. + +Once a parser function returns success for this key, that fact is +recorded, and the @code{ARGP_KEY_NO_ARGS} case won't be +used. @emph{However}, if while processing the argument a parser function +decrements the @code{next} field of its @var{state} argument, the option +won't be considered processed; this is to allow you to actually modify +the argument, perhaps into an option, and have it processed again. + +@comment argp.h +@comment GNU +@item ARGP_KEY_ARGS +If a parser function returns @code{ARGP_ERR_UNKNOWN} for +@code{ARGP_KEY_ARG}, it is immediately called again with the key +@code{ARGP_KEY_ARGS}, which has a similar meaning, but is slightly more +convenient for consuming all remaining arguments. @var{arg} is 0, and +the tail of the argument vector may be found at @code{@var{state}->argv ++ @var{state}->next}. If success is returned for this key, and +@code{@var{state}->next} is unchanged, all remaining arguments are +considered to have been consumed. Otherwise, the amount by which +@code{@var{state}->next} has been adjusted indicates how many were used. +Here's an example that uses both, for different args: + + +@smallexample +@dots{} +case ARGP_KEY_ARG: + if (@var{state}->arg_num == 0) + /* First argument */ + first_arg = @var{arg}; + else + /* Let the next case parse it. */ + return ARGP_KEY_UNKNOWN; + break; +case ARGP_KEY_ARGS: + remaining_args = @var{state}->argv + @var{state}->next; + num_remaining_args = @var{state}->argc - @var{state}->next; + break; +@end smallexample + +@comment argp.h +@comment GNU +@item ARGP_KEY_END +This indicates that there are no more command line arguments. Parser +functions are called in a different order, children first. This allows +each parser to clean up its state for the parent. + +@comment argp.h +@comment GNU +@item ARGP_KEY_NO_ARGS +Because it's common to do some special processing if there aren't any +non-option args, parser functions are called with this key if they +didn't successfully process any non-option arguments. This is called +just before @code{ARGP_KEY_END}, where more general validity checks on +previously parsed arguments take place. + +@comment argp.h +@comment GNU +@item ARGP_KEY_INIT +This is passed in before any parsing is done. Afterwards, the values of +each element of the @code{child_input} field of @var{state}, if any, are +copied to each child's state to be the initial value of the @code{input} +when @emph{their} parsers are called. + +@comment argp.h +@comment GNU +@item ARGP_KEY_SUCCESS +Passed in when parsing has successfully been completed, even if +arguments remain. + +@comment argp.h +@comment GNU +@item ARGP_KEY_ERROR +Passed in if an error has occurred and parsing is terminated. In this +case a call with a key of @code{ARGP_KEY_SUCCESS} is never made. + +@comment argp.h +@comment GNU +@item ARGP_KEY_FINI +The final key ever seen by any parser, even after +@code{ARGP_KEY_SUCCESS} and @code{ARGP_KEY_ERROR}. Any resources +allocated by @code{ARGP_KEY_INIT} may be freed here. At times, certain +resources allocated are to be returned to the caller after a successful +parse. In that case, those particular resources can be freed in the +@code{ARGP_KEY_ERROR} case. +@end vtable + +In all cases, @code{ARGP_KEY_INIT} is the first key seen by parser +functions, and @code{ARGP_KEY_FINI} the last, unless an error was +returned by the parser for @code{ARGP_KEY_INIT}. Other keys can occur +in one the following orders. @var{opt} refers to an arbitrary option +key: + +@table @asis +@item @var{opt}@dots{} @code{ARGP_KEY_NO_ARGS} @code{ARGP_KEY_END} @code{ARGP_KEY_SUCCESS} +The arguments being parsed did not contain any non-option arguments. + +@item ( @var{opt} | @code{ARGP_KEY_ARG} )@dots{} @code{ARGP_KEY_END} @code{ARGP_KEY_SUCCESS} +All non-option arguments were successfully handled by a parser +function. There may be multiple parser functions if multiple argp +parsers were combined. + +@item ( @var{opt} | @code{ARGP_KEY_ARG} )@dots{} @code{ARGP_KEY_SUCCESS} +Some non-option argument went unrecognized. + +This occurs when every parser function returns @code{ARGP_KEY_UNKNOWN} +for an argument, in which case parsing stops at that argument if +@var{arg_index} is a null pointer. Otherwise an error occurs. +@end table + +In all cases, if a non-null value for @var{arg_index} gets passed to +@code{argp_parse}, the index of the first unparsed command-line argument +is passed back in that value. + +If an error occurs and is either detected by argp or because a parser +function returned an error value, each parser is called with +@code{ARGP_KEY_ERROR}. No further calls are made, except the final call +with @code{ARGP_KEY_FINI}. + +@node Argp Parsing State, Argp Helper Functions, Argp Special Keys, Argp Parser Functions +@subsubsection Argp Parsing State + +The third argument to argp parser functions (@pxref{Argp Parser +Functions}) is a pointer to a @code{struct argp_state}, which contains +information about the state of the option parsing. + +@comment argp.h +@comment GNU +@deftp {Data Type} {struct argp_state} +This structure has the following fields, which may be modified as noted: + +@table @code +@item const struct argp *const root_argp +The top level argp parser being parsed. Note that this is often +@emph{not} the same @code{struct argp} passed into @code{argp_parse} by +the invoking program. @xref{Argp}. It is an internal argp parser that +contains options implemented by @code{argp_parse} itself, such as +@samp{--help}. + +@item int argc +@itemx char **argv +The argument vector being parsed. This may be modified. + +@item int next +The index in @code{argv} of the next argument to be parsed. This may be +modified. + +One way to consume all remaining arguments in the input is to set +@code{@var{state}->next = @var{state}->argc}, perhaps after recording +the value of the @code{next} field to find the consumed arguments. The +current option can be re-parsed immediately by decrementing this field, +then modifying @code{@var{state}->argv[@var{state}->next]} to reflect +the option that should be reexamined. + +@item unsigned flags +The flags supplied to @code{argp_parse}. These may be modified, although +some flags may only take effect when @code{argp_parse} is first +invoked. @xref{Argp Flags}. + +@item unsigned arg_num +While calling a parsing function with the @var{key} argument +@code{ARGP_KEY_ARG}, this represents the number of the current arg, +starting at 0. It is incremented after each @code{ARGP_KEY_ARG} call +returns. At all other times, this is the number of @code{ARGP_KEY_ARG} +arguments that have been processed. + +@item int quoted +If non-zero, the index in @code{argv} of the first argument following a +special @samp{--} argument. This prevents anything that follows from +being interpreted as an option. It is only set after argument parsing +has proceeded past this point. + +@item void *input +An arbitrary pointer passed in from the caller of @code{argp_parse}, in +the @var{input} argument. + +@item void **child_inputs +These are values that will be passed to child parsers. This vector will +be the same length as the number of children in the current parser. Each +child parser will be given the value of +@code{@var{state}->child_inputs[@var{i}]} as @emph{its} +@code{@var{state}->input} field, where @var{i} is the index of the child +in the this parser's @code{children} field. @xref{Argp Children}. + +@item void *hook +For the parser function's use. Initialized to 0, but otherwise ignored +by argp. + +@item char *name +The name used when printing messages. This is initialized to +@code{argv[0]}, or @code{program_invocation_name} if @code{argv[0]} is +unavailable. + +@item FILE *err_stream +@itemx FILE *out_stream +The stdio streams used when argp prints. Error messages are printed to +@code{err_stream}, all other output, such as @samp{--help} output) to +@code{out_stream}. These are initialized to @code{stderr} and +@code{stdout} respectively. @xref{Standard Streams}. + +@item void *pstate +Private, for use by the argp implementation. +@end table +@end deftp + +@node Argp Helper Functions, , Argp Parsing State, Argp Parser Functions +@subsubsection Functions For Use in Argp Parsers + +Argp provides a number of functions available to the user of argp +(@pxref{Argp Parser Functions}), mostly for producing error messages. +These take as their first argument the @var{state} argument to the +parser function. @xref{Argp Parsing State}. + + +@cindex usage messages, in argp +@comment argp.h +@comment GNU +@deftypefun void argp_usage (const struct argp_state *@var{state}) +@safety{@prelim{}@mtunsafe{@mtasurace{:argpbuf} @mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @ascuintl{} @asucorrupt{}}@acunsafe{@acsmem{} @acucorrupt{} @aculock{}}} +@c Just calls argp_state_help with stderr and ARGP_HELP_STD_USAGE. +Outputs the standard usage message for the argp parser referred to by +@var{state} to @code{@var{state}->err_stream} and terminates the program +with @code{exit (argp_err_exit_status)}. @xref{Argp Global Variables}. +@end deftypefun + +@cindex syntax error messages, in argp +@comment argp.h +@comment GNU +@deftypefun void argp_error (const struct argp_state *@var{state}, const char *@var{fmt}, @dots{}) +@safety{@prelim{}@mtunsafe{@mtasurace{:argpbuf} @mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @ascuintl{} @asucorrupt{}}@acunsafe{@acsmem{} @acucorrupt{} @aculock{}}} +@c Lock stream, vasprintf the formatted message into a buffer, print the +@c buffer prefixed by the short program name (in libc, +@c argp_short_program_name is a macro that expands to +@c program_invocation_short_name), releases the buffer, then call +@c argp_state_help with stream and ARGP_HELP_STD_ERR, unlocking the +@c stream at the end. +Prints the printf format string @var{fmt} and following args, preceded +by the program name and @samp{:}, and followed by a @w{@samp{Try @dots{} +--help}} message, and terminates the program with an exit status of +@code{argp_err_exit_status}. @xref{Argp Global Variables}. +@end deftypefun + +@cindex error messages, in argp +@comment argp.h +@comment GNU +@deftypefun void argp_failure (const struct argp_state *@var{state}, int @var{status}, int @var{errnum}, const char *@var{fmt}, @dots{}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{}}} +@c Lock stream, write out the short program name, vasprintf the optional +@c formatted message to a buffer, print the buffer prefixed by colon and +@c blank, release the buffer, call strerror_r with an automatic buffer, +@c print it out after colon and blank, put[w]c a line break, unlock the +@c stream, then exit unless ARGP_NO_EXIT. +Similar to the standard GNU error-reporting function @code{error}, this +prints the program name and @samp{:}, the printf format string +@var{fmt}, and the appropriate following args. If it is non-zero, the +standard unix error text for @var{errnum} is printed. If @var{status} is +non-zero, it terminates the program with that value as its exit status. + +The difference between @code{argp_failure} and @code{argp_error} is that +@code{argp_error} is for @emph{parsing errors}, whereas +@code{argp_failure} is for other problems that occur during parsing but +don't reflect a syntactic problem with the input, such as illegal values +for options, bad phase of the moon, etc. +@end deftypefun + +@comment argp.h +@comment GNU +@deftypefun void argp_state_help (const struct argp_state *@var{state}, FILE *@var{stream}, unsigned @var{flags}) +@safety{@prelim{}@mtunsafe{@mtasurace{:argpbuf} @mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @ascuintl{} @asucorrupt{}}@acunsafe{@acsmem{} @acucorrupt{} @aculock{}}} +@c Just calls _help with the short program name and optionally exit. +@c The main problems in _help, besides the usual issues with stream I/O +@c and translation, are the use of a static buffer (uparams, thus +@c @mtasurace:argpbuf) that makes the whole thing thread-unsafe, reading +@c from the environment for ARGP_HELP_FMT, accessing the locale object +@c multiple times. + +@c _help @mtsenv @mtasurace:argpbuf @mtslocale @ascuheap @ascuintl @asucorrupt @acsmem @acucorrupt @aculock +@c dgettext @ascuintl +@c flockfile @aculock +@c funlockfile @aculock +@c fill_in_uparams @mtsenv @mtasurace:argpbuf @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem +@c argp_failure dup (status = errnum = 0) +@c atoi dup @mtslocale +@c argp_hol @ascuheap @acsmem +@c make_hol @ascuheap @acsmem +@c hol_add_cluster @ascuheap @acsmem +@c hol_append @ascuheap @acsmem +@c hol_set_group ok +@c hol_find_entry ok +@c hol_sort @mtslocale @acucorrupt +@c qsort dup @acucorrupt +@c hol_entry_qcmp @mtslocale +@c hol_entry_cmp @mtslocale +@c group_cmp ok +@c hol_cluster_cmp ok +@c group_cmp ok +@c hol_entry_first_short @mtslocale +@c hol_entry_short_iterate [@mtslocale] +@c until_short ok +@c oshort ok +@c isprint ok +@c odoc ok +@c hol_entry_first_long ok +@c canon_doc_option @mtslocale +@c tolower dup +@c hol_usage @mtslocale @ascuintl @ascuheap @acsmem +@c hol_entry_short_iterate ok +@c add_argless_short_opt ok +@c argp_fmtstream_printf dup +@c hol_entry_short_iterate @mtslocale @ascuintl @ascuheap @acsmem +@c usage_argful_short_opt @mtslocale @ascuintl @ascuheap @acsmem +@c dgettext dup +@c argp_fmtstream_printf dup +@c hol_entry_long_iterate @mtslocale @ascuintl @ascuheap @acsmem +@c usage_long_opt @mtslocale @ascuintl @ascuheap @acsmem +@c dgettext dup +@c argp_fmtstream_printf dup +@c hol_help @mtslocale @mtasurace:argpbuf @ascuheap @ascuintl @asucorrupt @acsmem @acucorrupt @aculock +@c hol_entry_help @mtslocale @mtasurace:argpbuf @ascuheap @ascuintl @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_set_lmargin dup +@c argp_fmtstream_wmargin dup +@c argp_fmtstream_set_wmargin dup +@c comma @mtslocale @ascuheap @ascuintl @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_putc dup +@c hol_cluster_is_child ok +@c argp_fmtstream_wmargin dup +@c print_header dup +@c argp_fmtstream_set_wmargin dup +@c argp_fmtstream_puts dup +@c indent_to dup +@c argp_fmtstream_putc dup +@c arg @mtslocale @ascuheap @acsmem +@c argp_fmtstream_printf dup +@c odoc dup +@c argp_fmtstream_puts dup +@c argp_fmtstream_printf dup +@c print_header @mtslocale @mtasurace:argpbuf @ascuheap @ascuintl @asucorrupt @acsmem @acucorrupt @aculock +@c dgettext dup +@c filter_doc dup +@c argp_fmtstream_putc dup +@c indent_to dup +@c argp_fmtstream_set_lmargin dup +@c argp_fmtstream_set_wmargin dup +@c argp_fmtstream_puts dup +@c free dup +@c filter_doc dup +@c argp_fmtstream_point dup +@c indent_to @mtslocale @ascuheap @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_point dup +@c argp_fmtstream_putc dup +@c dgettext dup +@c filter_doc dup +@c argp_fmtstream_putc dup +@c argp_fmtstream_puts dup +@c free dup +@c hol_free @ascuheap @acsmem +@c free dup +@c argp_args_levels ok +@c argp_args_usage @mtslocale @ascuintl @ascuheap @asucorrupt @acsmem @acucorrupt @aculock +@c dgettext dup +@c filter_doc ok +@c argp_input ok +@c argp->help_filter +@c space @mtslocale @ascuheap @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_point dup +@c argp_fmtstream_rmargin @mtslocale @asucorrupt @acucorrupt @aculock +@c argp_fmtstream_update dup +@c argp_fmtstream_putc dup +@c argp_fmtstream_write dup +@c free dup +@c argp_doc @mtslocale @ascuheap @ascuintl @asucorrupt @acsmem @acucorrupt @aculock +@c dgettext @ascuintl +@c strndup @ascuheap @acsmem +@c argp_input dup +@c argp->help_filter +@c argp_fmtstream_putc @mtslocale @ascuheap @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_ensure dup +@c argp_fmtstream_write dup +@c argp_fmtstream_puts dup +@c argp_fmtstream_point @mtslocale @asucorrupt @acucorrupt @aculock +@c argp_fmtstream_update dup +@c argp_fmtstream_lmargin dup +@c free dup +@c argp_make_fmtstream @ascuheap @acsmem +@c argp_fmtstream_free @mtslocale @ascuheap @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_update @mtslocale @asucorrupt @acucorrupt @aculock +@c put[w]c_unlocked dup +@c isblank in loop @mtslocale +@c fxprintf @aculock +@c fxprintf @aculock +@c free dup +@c argp_fmtstream_set_wmargin @mtslocale @asucorrupt @acucorrupt @aculock +@c argp_fmtstream_update dup +@c argp_fmtstream_printf @mtslocale @ascuheap @acsmem +@c argp_fmtstream_ensure dup +@c vsnprintf dup +@c argp_fmtstream_set_lmargin @mtslocale @asucorrupt @acucorrupt @aculock +@c argp_fmtstream_update dup +@c argp_fmtstream_puts @mtslocale @ascuheap @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_write @mtslocale @ascuheap @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_ensure @mtslocale @ascuheap @asucorrupt @acsmem @acucorrupt @aculock +@c argp_fmtstream_update dup +@c fxprintf @aculock +@c realloc @ascuheap @acsmem +Outputs a help message for the argp parser referred to by @var{state}, +to @var{stream}. The @var{flags} argument determines what sort of help +message is produced. @xref{Argp Help Flags}. +@end deftypefun + +Error output is sent to @code{@var{state}->err_stream}, and the program +name printed is @code{@var{state}->name}. + +The output or program termination behavior of these functions may be +suppressed if the @code{ARGP_NO_EXIT} or @code{ARGP_NO_ERRS} flags are +passed to @code{argp_parse}. @xref{Argp Flags}. + +This behavior is useful if an argp parser is exported for use by other +programs (e.g., by a library), and may be used in a context where it is +not desirable to terminate the program in response to parsing errors. In +argp parsers intended for such general use, and for the case where the +program @emph{doesn't} terminate, calls to any of these functions should +be followed by code that returns the appropriate error code: + +@smallexample +if (@var{bad argument syntax}) + @{ + argp_usage (@var{state}); + return EINVAL; + @} +@end smallexample + +@noindent +If a parser function will @emph{only} be used when @code{ARGP_NO_EXIT} +is not set, the return may be omitted. + +@node Argp Children, Argp Help Filtering, Argp Parser Functions, Argp Parsers +@subsection Combining Multiple Argp Parsers + +The @code{children} field in a @code{struct argp} enables other argp +parsers to be combined with the referencing one for the parsing of a +single set of arguments. This field should point to a vector of +@code{struct argp_child}, which is terminated by an entry having a value +of zero in the @code{argp} field. + +Where conflicts between combined parsers arise, as when two specify an +option with the same name, the parser conflicts are resolved in favor of +the parent argp parser(s), or the earlier of the argp parsers in the +list of children. + +@comment argp.h +@comment GNU +@deftp {Data Type} {struct argp_child} +An entry in the list of subsidiary argp parsers pointed to by the +@code{children} field in a @code{struct argp}. The fields are as +follows: + +@table @code +@item const struct argp *argp +The child argp parser, or zero to end of the list. + +@item int flags +Flags for this child. + +@item const char *header +If non-zero, this is an optional header to be printed within help output +before the child options. As a side-effect, a non-zero value forces the +child options to be grouped together. To achieve this effect without +actually printing a header string, use a value of @code{""}. As with +header strings specified in an option entry, the conventional value of +the last character is @samp{:}. @xref{Argp Option Vectors}. + +@item int group +This is where the child options are grouped relative to the other +`consolidated' options in the parent argp parser. The values are the +same as the @code{group} field in @code{struct argp_option}. @xref{Argp +Option Vectors}. All child-groupings follow parent options at a +particular group level. If both this field and @code{header} are zero, +then the child's options aren't grouped together, they are merged with +parent options at the parent option group level. + +@end table +@end deftp + +@node Argp Flags, Argp Help, Argp Parsers, Argp +@subsection Flags for @code{argp_parse} + +The default behavior of @code{argp_parse} is designed to be convenient +for the most common case of parsing program command line argument. To +modify these defaults, the following flags may be or'd together in the +@var{flags} argument to @code{argp_parse}: + +@vtable @code +@comment argp.h +@comment GNU +@item ARGP_PARSE_ARGV0 +Don't ignore the first element of the @var{argv} argument to +@code{argp_parse}. Unless @code{ARGP_NO_ERRS} is set, the first element +of the argument vector is skipped for option parsing purposes, as it +corresponds to the program name in a command line. + +@comment argp.h +@comment GNU +@item ARGP_NO_ERRS +Don't print error messages for unknown options to @code{stderr}; unless +this flag is set, @code{ARGP_PARSE_ARGV0} is ignored, as @code{argv[0]} +is used as the program name in the error messages. This flag implies +@code{ARGP_NO_EXIT}. This is based on the assumption that silent exiting +upon errors is bad behavior. + +@comment argp.h +@comment GNU +@item ARGP_NO_ARGS +Don't parse any non-option args. Normally these are parsed by calling +the parse functions with a key of @code{ARGP_KEY_ARG}, the actual +argument being the value. This flag needn't normally be set, as the +default behavior is to stop parsing as soon as an argument fails to be +parsed. @xref{Argp Parser Functions}. + +@comment argp.h +@comment GNU +@item ARGP_IN_ORDER +Parse options and arguments in the same order they occur on the command +line. Normally they're rearranged so that all options come first. + +@comment argp.h +@comment GNU +@item ARGP_NO_HELP +Don't provide the standard long option @samp{--help}, which ordinarily +causes usage and option help information to be output to @code{stdout} +and @code{exit (0)}. + +@comment argp.h +@comment GNU +@item ARGP_NO_EXIT +Don't exit on errors, although they may still result in error messages. + +@comment argp.h +@comment GNU +@item ARGP_LONG_ONLY +Use the GNU getopt `long-only' rules for parsing arguments. This allows +long-options to be recognized with only a single @samp{-} +(i.e., @samp{-help}). This results in a less useful interface, and its +use is discouraged as it conflicts with the way most GNU programs work +as well as the GNU coding standards. + +@comment argp.h +@comment GNU +@item ARGP_SILENT +Turns off any message-printing/exiting options, specifically +@code{ARGP_NO_EXIT}, @code{ARGP_NO_ERRS}, and @code{ARGP_NO_HELP}. +@end vtable + +@node Argp Help Filtering, , Argp Children, Argp Parsers +@need 2000 +@subsection Customizing Argp Help Output + +The @code{help_filter} field in a @code{struct argp} is a pointer to a +function that filters the text of help messages before displaying +them. They have a function signature like: + +@smallexample +char *@var{help-filter} (int @var{key}, const char *@var{text}, void *@var{input}) +@end smallexample + + +@noindent +Where @var{key} is either a key from an option, in which case @var{text} +is that option's help text. @xref{Argp Option Vectors}. Alternately, one +of the special keys with names beginning with @samp{ARGP_KEY_HELP_} +might be used, describing which other help text @var{text} will contain. +@xref{Argp Help Filter Keys}. + +The function should return either @var{text} if it remains as-is, or a +replacement string allocated using @code{malloc}. This will be either be +freed by argp or zero, which prints nothing. The value of @var{text} is +supplied @emph{after} any translation has been done, so if any of the +replacement text needs translation, it will be done by the filter +function. @var{input} is either the input supplied to @code{argp_parse} +or it is zero, if @code{argp_help} was called directly by the user. + +@menu +* Keys: Argp Help Filter Keys. Special @var{key} values for help filter functions. +@end menu + +@node Argp Help Filter Keys, , , Argp Help Filtering +@subsubsection Special Keys for Argp Help Filter Functions + +The following special values may be passed to an argp help filter +function as the first argument in addition to key values for user +options. They specify which help text the @var{text} argument contains: + +@vtable @code +@comment argp.h +@comment GNU +@item ARGP_KEY_HELP_PRE_DOC +The help text preceding options. + +@comment argp.h +@comment GNU +@item ARGP_KEY_HELP_POST_DOC +The help text following options. + +@comment argp.h +@comment GNU +@item ARGP_KEY_HELP_HEADER +The option header string. + +@comment argp.h +@comment GNU +@item ARGP_KEY_HELP_EXTRA +This is used after all other documentation; @var{text} is zero for this key. + +@comment argp.h +@comment GNU +@item ARGP_KEY_HELP_DUP_ARGS_NOTE +The explanatory note printed when duplicate option arguments have been suppressed. + +@comment argp.h +@comment GNU +@item ARGP_KEY_HELP_ARGS_DOC +The argument doc string; formally the @code{args_doc} field from the argp parser. @xref{Argp Parsers}. +@end vtable + +@node Argp Help, Argp Examples, Argp Flags, Argp +@subsection The @code{argp_help} Function + +Normally programs using argp need not be written with particular +printing argument-usage-type help messages in mind as the standard +@samp{--help} option is handled automatically by argp. Typical error +cases can be handled using @code{argp_usage} and +@code{argp_error}. @xref{Argp Helper Functions}. However, if it's +desirable to print a help message in some context other than parsing the +program options, argp offers the @code{argp_help} interface. + +@comment argp.h +@comment GNU +@deftypefun void argp_help (const struct argp *@var{argp}, FILE *@var{stream}, unsigned @var{flags}, char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasurace{:argpbuf} @mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @ascuintl{} @asucorrupt{}}@acunsafe{@acsmem{} @acucorrupt{} @aculock{}}} +@c Just calls _help. +This outputs a help message for the argp parser @var{argp} to +@var{stream}. The type of messages printed will be determined by +@var{flags}. + +Any options such as @samp{--help} that are implemented automatically by +argp itself will @emph{not} be present in the help output; for this +reason it is best to use @code{argp_state_help} if calling from within +an argp parser function. @xref{Argp Helper Functions}. +@end deftypefun + +@menu +* Flags: Argp Help Flags. Specifying what sort of help message to print. +@end menu + +@node Argp Help Flags, , , Argp Help +@subsection Flags for the @code{argp_help} Function + +When calling @code{argp_help} (@pxref{Argp Help}) or +@code{argp_state_help} (@pxref{Argp Helper Functions}) the exact output +is determined by the @var{flags} argument. This should consist of any of +the following flags, or'd together: + +@vtable @code +@item ARGP_HELP_USAGE +A unix @samp{Usage:} message that explicitly lists all options. + +@item ARGP_HELP_SHORT_USAGE +A unix @samp{Usage:} message that displays an appropriate placeholder to +indicate where the options go; useful for showing the non-option +argument syntax. + +@item ARGP_HELP_SEE +A @samp{Try @dots{} for more help} message; @samp{@dots{}} contains the +program name and @samp{--help}. + +@item ARGP_HELP_LONG +A verbose option help message that gives each option available along +with its documentation string. + +@item ARGP_HELP_PRE_DOC +The part of the argp parser doc string preceding the verbose option help. + +@item ARGP_HELP_POST_DOC +The part of the argp parser doc string that following the verbose option help. + +@item ARGP_HELP_DOC +@code{(ARGP_HELP_PRE_DOC | ARGP_HELP_POST_DOC)} + +@item ARGP_HELP_BUG_ADDR +A message that prints where to report bugs for this program, if the +@code{argp_program_bug_address} variable contains this information. + +@item ARGP_HELP_LONG_ONLY +This will modify any output to reflect the @code{ARGP_LONG_ONLY} mode. +@end vtable + +The following flags are only understood when used with +@code{argp_state_help}. They control whether the function returns after +printing its output, or terminates the program: + +@vtable @code +@item ARGP_HELP_EXIT_ERR +This will terminate the program with @code{exit (argp_err_exit_status)}. + +@item ARGP_HELP_EXIT_OK +This will terminate the program with @code{exit (0)}. +@end vtable + +The following flags are combinations of the basic flags for printing +standard messages: + +@vtable @code +@item ARGP_HELP_STD_ERR +Assuming that an error message for a parsing error has printed, this +prints a message on how to get help, and terminates the program with an +error. + +@item ARGP_HELP_STD_USAGE +This prints a standard usage message and terminates the program with an +error. This is used when no other specific error messages are +appropriate or available. + +@item ARGP_HELP_STD_HELP +This prints the standard response for a @samp{--help} option, and +terminates the program successfully. +@end vtable + +@node Argp Examples, Argp User Customization, Argp Help, Argp +@subsection Argp Examples + +These example programs demonstrate the basic usage of argp. + +@menu +* 1: Argp Example 1. A minimal program using argp. +* 2: Argp Example 2. A program using only default options. +* 3: Argp Example 3. A simple program with user options. +* 4: Argp Example 4. Combining multiple argp parsers. +@end menu + +@node Argp Example 1, Argp Example 2, , Argp Examples +@subsubsection A Minimal Program Using Argp + +This is perhaps the smallest program possible that uses argp. It won't +do much except give an error message and exit when there are any +arguments, and prints a rather pointless message for @samp{--help}. + +@smallexample +@include argp-ex1.c.texi +@end smallexample + +@node Argp Example 2, Argp Example 3, Argp Example 1, Argp Examples +@subsubsection A Program Using Argp with Only Default Options + +This program doesn't use any options or arguments, it uses argp to be +compliant with the GNU standard command line format. + +In addition to giving no arguments and implementing a @samp{--help} +option, this example has a @samp{--version} option, which will put the +given documentation string and bug address in the @samp{--help} output, +as per GNU standards. + +The variable @code{argp} contains the argument parser +specification. Adding fields to this structure is the way most +parameters are passed to @code{argp_parse}. The first three fields are +normally used, but they are not in this small program. There are also +two global variables that argp can use defined here, +@code{argp_program_version} and @code{argp_program_bug_address}. They +are considered global variables because they will almost always be +constant for a given program, even if they use different argument +parsers for various tasks. + +@smallexample +@include argp-ex2.c.texi +@end smallexample + +@node Argp Example 3, Argp Example 4, Argp Example 2, Argp Examples +@subsubsection A Program Using Argp with User Options + +This program uses the same features as example 2, adding user options +and arguments. + +We now use the first four fields in @code{argp} (@pxref{Argp Parsers}) +and specify @code{parse_opt} as the parser function. @xref{Argp Parser +Functions}. + +Note that in this example, @code{main} uses a structure to communicate +with the @code{parse_opt} function, a pointer to which it passes in the +@code{input} argument to @code{argp_parse}. @xref{Argp}. It is retrieved +by @code{parse_opt} through the @code{input} field in its @code{state} +argument. @xref{Argp Parsing State}. Of course, it's also possible to +use global variables instead, but using a structure like this is +somewhat more flexible and clean. + +@smallexample +@include argp-ex3.c.texi +@end smallexample + +@node Argp Example 4, , Argp Example 3, Argp Examples +@subsubsection A Program Using Multiple Combined Argp Parsers + +This program uses the same features as example 3, but has more options, +and presents more structure in the @samp{--help} output. It also +illustrates how you can `steal' the remainder of the input arguments +past a certain point for programs that accept a list of items. It also +illustrates the @var{key} value @code{ARGP_KEY_NO_ARGS}, which is only +given if no non-option arguments were supplied to the +program. @xref{Argp Special Keys}. + +For structuring help output, two features are used: @emph{headers} and a +two part option string. The @emph{headers} are entries in the options +vector. @xref{Argp Option Vectors}. The first four fields are zero. The +two part documentation string are in the variable @code{doc}, which +allows documentation both before and after the options. @xref{Argp +Parsers}, the two parts of @code{doc} are separated by a vertical-tab +character (@code{'\v'}, or @code{'\013'}). By convention, the +documentation before the options is a short string stating what the +program does, and after any options it is longer, describing the +behavior in more detail. All documentation strings are automatically +filled for output, although newlines may be included to force a line +break at a particular point. In addition, documentation strings are +passed to the @code{gettext} function, for possible translation into the +current locale. + +@smallexample +@include argp-ex4.c.texi +@end smallexample + +@node Argp User Customization, , Argp Examples, Argp +@subsection Argp User Customization + +@cindex ARGP_HELP_FMT environment variable +The formatting of argp @samp{--help} output may be controlled to some +extent by a program's users, by setting the @code{ARGP_HELP_FMT} +environment variable to a comma-separated list of tokens. Whitespace is +ignored: + +@table @samp +@item dup-args +@itemx no-dup-args +These turn @dfn{duplicate-argument-mode} on or off. In duplicate +argument mode, if an option that accepts an argument has multiple names, +the argument is shown for each name. Otherwise, it is only shown for the +first long option. A note is subsequently printed so the user knows that +it applies to other names as well. The default is @samp{no-dup-args}, +which is less consistent, but prettier. + +@item dup-args-note +@item no-dup-args-note +These will enable or disable the note informing the user of suppressed +option argument duplication. The default is @samp{dup-args-note}. + +@item short-opt-col=@var{n} +This prints the first short option in column @var{n}. The default is 2. + +@item long-opt-col=@var{n} +This prints the first long option in column @var{n}. The default is 6. + +@item doc-opt-col=@var{n} +This prints `documentation options' (@pxref{Argp Option Flags}) in +column @var{n}. The default is 2. + +@item opt-doc-col=@var{n} +This prints the documentation for options starting in column +@var{n}. The default is 29. + +@item header-col=@var{n} +This will indent the group headers that document groups of options to +column @var{n}. The default is 1. + +@item usage-indent=@var{n} +This will indent continuation lines in @samp{Usage:} messages to column +@var{n}. The default is 12. + +@item rmargin=@var{n} +This will word wrap help output at or before column @var{n}. The default +is 79. +@end table diff --git a/REORG.TODO/manual/arith.texi b/REORG.TODO/manual/arith.texi new file mode 100644 index 0000000000..dec12a06ae --- /dev/null +++ b/REORG.TODO/manual/arith.texi @@ -0,0 +1,3227 @@ +@node Arithmetic, Date and Time, Mathematics, Top +@c %MENU% Low level arithmetic functions +@chapter Arithmetic Functions + +This chapter contains information about functions for doing basic +arithmetic operations, such as splitting a float into its integer and +fractional parts or retrieving the imaginary part of a complex value. +These functions are declared in the header files @file{math.h} and +@file{complex.h}. + +@menu +* Integers:: Basic integer types and concepts +* Integer Division:: Integer division with guaranteed rounding. +* Floating Point Numbers:: Basic concepts. IEEE 754. +* Floating Point Classes:: The five kinds of floating-point number. +* Floating Point Errors:: When something goes wrong in a calculation. +* Rounding:: Controlling how results are rounded. +* Control Functions:: Saving and restoring the FPU's state. +* Arithmetic Functions:: Fundamental operations provided by the library. +* Complex Numbers:: The types. Writing complex constants. +* Operations on Complex:: Projection, conjugation, decomposition. +* Parsing of Numbers:: Converting strings to numbers. +* Printing of Floats:: Converting floating-point numbers to strings. +* System V Number Conversion:: An archaic way to convert numbers to strings. +@end menu + +@node Integers +@section Integers +@cindex integer + +The C language defines several integer data types: integer, short integer, +long integer, and character, all in both signed and unsigned varieties. +The GNU C compiler extends the language to contain long long integers +as well. +@cindex signedness + +The C integer types were intended to allow code to be portable among +machines with different inherent data sizes (word sizes), so each type +may have different ranges on different machines. The problem with +this is that a program often needs to be written for a particular range +of integers, and sometimes must be written for a particular size of +storage, regardless of what machine the program runs on. + +To address this problem, @theglibc{} contains C type definitions +you can use to declare integers that meet your exact needs. Because the +@glibcadj{} header files are customized to a specific machine, your +program source code doesn't have to be. + +These @code{typedef}s are in @file{stdint.h}. +@pindex stdint.h + +If you require that an integer be represented in exactly N bits, use one +of the following types, with the obvious mapping to bit size and signedness: + +@itemize @bullet +@item int8_t +@item int16_t +@item int32_t +@item int64_t +@item uint8_t +@item uint16_t +@item uint32_t +@item uint64_t +@end itemize + +If your C compiler and target machine do not allow integers of a certain +size, the corresponding above type does not exist. + +If you don't need a specific storage size, but want the smallest data +structure with @emph{at least} N bits, use one of these: + +@itemize @bullet +@item int_least8_t +@item int_least16_t +@item int_least32_t +@item int_least64_t +@item uint_least8_t +@item uint_least16_t +@item uint_least32_t +@item uint_least64_t +@end itemize + +If you don't need a specific storage size, but want the data structure +that allows the fastest access while having at least N bits (and +among data structures with the same access speed, the smallest one), use +one of these: + +@itemize @bullet +@item int_fast8_t +@item int_fast16_t +@item int_fast32_t +@item int_fast64_t +@item uint_fast8_t +@item uint_fast16_t +@item uint_fast32_t +@item uint_fast64_t +@end itemize + +If you want an integer with the widest range possible on the platform on +which it is being used, use one of the following. If you use these, +you should write code that takes into account the variable size and range +of the integer. + +@itemize @bullet +@item intmax_t +@item uintmax_t +@end itemize + +@Theglibc{} also provides macros that tell you the maximum and +minimum possible values for each integer data type. The macro names +follow these examples: @code{INT32_MAX}, @code{UINT8_MAX}, +@code{INT_FAST32_MIN}, @code{INT_LEAST64_MIN}, @code{UINTMAX_MAX}, +@code{INTMAX_MAX}, @code{INTMAX_MIN}. Note that there are no macros for +unsigned integer minima. These are always zero. Similiarly, there +are macros such as @code{INTMAX_WIDTH} for the width of these types. +Those macros for integer type widths come from TS 18661-1:2014. +@cindex maximum possible integer +@cindex minimum possible integer + +There are similar macros for use with C's built in integer types which +should come with your C compiler. These are described in @ref{Data Type +Measurements}. + +Don't forget you can use the C @code{sizeof} function with any of these +data types to get the number of bytes of storage each uses. + + +@node Integer Division +@section Integer Division +@cindex integer division functions + +This section describes functions for performing integer division. These +functions are redundant when GNU CC is used, because in GNU C the +@samp{/} operator always rounds towards zero. But in other C +implementations, @samp{/} may round differently with negative arguments. +@code{div} and @code{ldiv} are useful because they specify how to round +the quotient: towards zero. The remainder has the same sign as the +numerator. + +These functions are specified to return a result @var{r} such that the value +@code{@var{r}.quot*@var{denominator} + @var{r}.rem} equals +@var{numerator}. + +@pindex stdlib.h +To use these facilities, you should include the header file +@file{stdlib.h} in your program. + +@comment stdlib.h +@comment ISO +@deftp {Data Type} div_t +This is a structure type used to hold the result returned by the @code{div} +function. It has the following members: + +@table @code +@item int quot +The quotient from the division. + +@item int rem +The remainder from the division. +@end table +@end deftp + +@comment stdlib.h +@comment ISO +@deftypefun div_t div (int @var{numerator}, int @var{denominator}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Functions in this section are pure, and thus safe. +The function @code{div} computes the quotient and remainder from +the division of @var{numerator} by @var{denominator}, returning the +result in a structure of type @code{div_t}. + +If the result cannot be represented (as in a division by zero), the +behavior is undefined. + +Here is an example, albeit not a very useful one. + +@smallexample +div_t result; +result = div (20, -6); +@end smallexample + +@noindent +Now @code{result.quot} is @code{-3} and @code{result.rem} is @code{2}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftp {Data Type} ldiv_t +This is a structure type used to hold the result returned by the @code{ldiv} +function. It has the following members: + +@table @code +@item long int quot +The quotient from the division. + +@item long int rem +The remainder from the division. +@end table + +(This is identical to @code{div_t} except that the components are of +type @code{long int} rather than @code{int}.) +@end deftp + +@comment stdlib.h +@comment ISO +@deftypefun ldiv_t ldiv (long int @var{numerator}, long int @var{denominator}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{ldiv} function is similar to @code{div}, except that the +arguments are of type @code{long int} and the result is returned as a +structure of type @code{ldiv_t}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftp {Data Type} lldiv_t +This is a structure type used to hold the result returned by the @code{lldiv} +function. It has the following members: + +@table @code +@item long long int quot +The quotient from the division. + +@item long long int rem +The remainder from the division. +@end table + +(This is identical to @code{div_t} except that the components are of +type @code{long long int} rather than @code{int}.) +@end deftp + +@comment stdlib.h +@comment ISO +@deftypefun lldiv_t lldiv (long long int @var{numerator}, long long int @var{denominator}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{lldiv} function is like the @code{div} function, but the +arguments are of type @code{long long int} and the result is returned as +a structure of type @code{lldiv_t}. + +The @code{lldiv} function was added in @w{ISO C99}. +@end deftypefun + +@comment inttypes.h +@comment ISO +@deftp {Data Type} imaxdiv_t +This is a structure type used to hold the result returned by the @code{imaxdiv} +function. It has the following members: + +@table @code +@item intmax_t quot +The quotient from the division. + +@item intmax_t rem +The remainder from the division. +@end table + +(This is identical to @code{div_t} except that the components are of +type @code{intmax_t} rather than @code{int}.) + +See @ref{Integers} for a description of the @code{intmax_t} type. + +@end deftp + +@comment inttypes.h +@comment ISO +@deftypefun imaxdiv_t imaxdiv (intmax_t @var{numerator}, intmax_t @var{denominator}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{imaxdiv} function is like the @code{div} function, but the +arguments are of type @code{intmax_t} and the result is returned as +a structure of type @code{imaxdiv_t}. + +See @ref{Integers} for a description of the @code{intmax_t} type. + +The @code{imaxdiv} function was added in @w{ISO C99}. +@end deftypefun + + +@node Floating Point Numbers +@section Floating Point Numbers +@cindex floating point +@cindex IEEE 754 +@cindex IEEE floating point + +Most computer hardware has support for two different kinds of numbers: +integers (@math{@dots{}-3, -2, -1, 0, 1, 2, 3@dots{}}) and +floating-point numbers. Floating-point numbers have three parts: the +@dfn{mantissa}, the @dfn{exponent}, and the @dfn{sign bit}. The real +number represented by a floating-point value is given by +@tex +$(s \mathrel? -1 \mathrel: 1) \cdot 2^e \cdot M$ +@end tex +@ifnottex +@math{(s ? -1 : 1) @mul{} 2^e @mul{} M} +@end ifnottex +where @math{s} is the sign bit, @math{e} the exponent, and @math{M} +the mantissa. @xref{Floating Point Concepts}, for details. (It is +possible to have a different @dfn{base} for the exponent, but all modern +hardware uses @math{2}.) + +Floating-point numbers can represent a finite subset of the real +numbers. While this subset is large enough for most purposes, it is +important to remember that the only reals that can be represented +exactly are rational numbers that have a terminating binary expansion +shorter than the width of the mantissa. Even simple fractions such as +@math{1/5} can only be approximated by floating point. + +Mathematical operations and functions frequently need to produce values +that are not representable. Often these values can be approximated +closely enough for practical purposes, but sometimes they can't. +Historically there was no way to tell when the results of a calculation +were inaccurate. Modern computers implement the @w{IEEE 754} standard +for numerical computations, which defines a framework for indicating to +the program when the results of calculation are not trustworthy. This +framework consists of a set of @dfn{exceptions} that indicate why a +result could not be represented, and the special values @dfn{infinity} +and @dfn{not a number} (NaN). + +@node Floating Point Classes +@section Floating-Point Number Classification Functions +@cindex floating-point classes +@cindex classes, floating-point +@pindex math.h + +@w{ISO C99} defines macros that let you determine what sort of +floating-point number a variable holds. + +@comment math.h +@comment ISO +@deftypefn {Macro} int fpclassify (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is a generic macro which works on all floating-point types and +which returns a value of type @code{int}. The possible values are: + +@vtable @code +@item FP_NAN +The floating-point number @var{x} is ``Not a Number'' (@pxref{Infinity +and NaN}) +@item FP_INFINITE +The value of @var{x} is either plus or minus infinity (@pxref{Infinity +and NaN}) +@item FP_ZERO +The value of @var{x} is zero. In floating-point formats like @w{IEEE +754}, where zero can be signed, this value is also returned if +@var{x} is negative zero. +@item FP_SUBNORMAL +Numbers whose absolute value is too small to be represented in the +normal format are represented in an alternate, @dfn{denormalized} format +(@pxref{Floating Point Concepts}). This format is less precise but can +represent values closer to zero. @code{fpclassify} returns this value +for values of @var{x} in this alternate format. +@item FP_NORMAL +This value is returned for all other values of @var{x}. It indicates +that there is nothing special about the number. +@end vtable + +@end deftypefn + +@code{fpclassify} is most useful if more than one property of a number +must be tested. There are more specific macros which only test one +property at a time. Generally these macros execute faster than +@code{fpclassify}, since there is special hardware support for them. +You should therefore use the specific macros whenever possible. + +@comment math.h +@comment ISO +@deftypefn {Macro} int iscanonical (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +In some floating-point formats, some values have canonical (preferred) +and noncanonical encodings (for IEEE interchange binary formats, all +encodings are canonical). This macro returns a nonzero value if +@var{x} has a canonical encoding. It is from TS 18661-1:2014. + +Note that some formats have multiple encodings of a value which are +all equally canonical; @code{iscanonical} returns a nonzero value for +all such encodings. Also, formats may have encodings that do not +correspond to any valid value of the type. In ISO C terms these are +@dfn{trap representations}; in @theglibc{}, @code{iscanonical} returns +zero for such encodings. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn {Macro} int isfinite (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if @var{x} is finite: not plus or +minus infinity, and not NaN. It is equivalent to + +@smallexample +(fpclassify (x) != FP_NAN && fpclassify (x) != FP_INFINITE) +@end smallexample + +@code{isfinite} is implemented as a macro which accepts any +floating-point type. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn {Macro} int isnormal (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if @var{x} is finite and normalized. +It is equivalent to + +@smallexample +(fpclassify (x) == FP_NORMAL) +@end smallexample +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn {Macro} int isnan (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if @var{x} is NaN. It is equivalent +to + +@smallexample +(fpclassify (x) == FP_NAN) +@end smallexample +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn {Macro} int issignaling (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if @var{x} is a signaling NaN +(sNaN). It is from TS 18661-1:2014. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn {Macro} int issubnormal (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if @var{x} is subnormal. It is +from TS 18661-1:2014. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn {Macro} int iszero (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if @var{x} is zero. It is from TS +18661-1:2014. +@end deftypefn + +Another set of floating-point classification functions was provided by +BSD. @Theglibc{} also supports these functions; however, we +recommend that you use the ISO C99 macros in new code. Those are standard +and will be available more widely. Also, since they are macros, you do +not have to worry about the type of their argument. + +@comment math.h +@comment BSD +@deftypefun int isinf (double @var{x}) +@comment math.h +@comment BSD +@deftypefunx int isinff (float @var{x}) +@comment math.h +@comment BSD +@deftypefunx int isinfl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function returns @code{-1} if @var{x} represents negative infinity, +@code{1} if @var{x} represents positive infinity, and @code{0} otherwise. +@end deftypefun + +@comment math.h +@comment BSD +@deftypefun int isnan (double @var{x}) +@comment math.h +@comment BSD +@deftypefunx int isnanf (float @var{x}) +@comment math.h +@comment BSD +@deftypefunx int isnanl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function returns a nonzero value if @var{x} is a ``not a number'' +value, and zero otherwise. + +@strong{NB:} The @code{isnan} macro defined by @w{ISO C99} overrides +the BSD function. This is normally not a problem, because the two +routines behave identically. However, if you really need to get the BSD +function for some reason, you can write + +@smallexample +(isnan) (x) +@end smallexample +@end deftypefun + +@comment math.h +@comment BSD +@deftypefun int finite (double @var{x}) +@comment math.h +@comment BSD +@deftypefunx int finitef (float @var{x}) +@comment math.h +@comment BSD +@deftypefunx int finitel (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function returns a nonzero value if @var{x} is finite or a ``not a +number'' value, and zero otherwise. +@end deftypefun + +@strong{Portability Note:} The functions listed in this section are BSD +extensions. + + +@node Floating Point Errors +@section Errors in Floating-Point Calculations + +@menu +* FP Exceptions:: IEEE 754 math exceptions and how to detect them. +* Infinity and NaN:: Special values returned by calculations. +* Status bit operations:: Checking for exceptions after the fact. +* Math Error Reporting:: How the math functions report errors. +@end menu + +@node FP Exceptions +@subsection FP Exceptions +@cindex exception +@cindex signal +@cindex zero divide +@cindex division by zero +@cindex inexact exception +@cindex invalid exception +@cindex overflow exception +@cindex underflow exception + +The @w{IEEE 754} standard defines five @dfn{exceptions} that can occur +during a calculation. Each corresponds to a particular sort of error, +such as overflow. + +When exceptions occur (when exceptions are @dfn{raised}, in the language +of the standard), one of two things can happen. By default the +exception is simply noted in the floating-point @dfn{status word}, and +the program continues as if nothing had happened. The operation +produces a default value, which depends on the exception (see the table +below). Your program can check the status word to find out which +exceptions happened. + +Alternatively, you can enable @dfn{traps} for exceptions. In that case, +when an exception is raised, your program will receive the @code{SIGFPE} +signal. The default action for this signal is to terminate the +program. @xref{Signal Handling}, for how you can change the effect of +the signal. + +@findex matherr +In the System V math library, the user-defined function @code{matherr} +is called when certain exceptions occur inside math library functions. +However, the Unix98 standard deprecates this interface. We support it +for historical compatibility, but recommend that you do not use it in +new programs. When this interface is used, exceptions may not be +raised. + +@noindent +The exceptions defined in @w{IEEE 754} are: + +@table @samp +@item Invalid Operation +This exception is raised if the given operands are invalid for the +operation to be performed. Examples are +(see @w{IEEE 754}, @w{section 7}): +@enumerate +@item +Addition or subtraction: @math{@infinity{} - @infinity{}}. (But +@math{@infinity{} + @infinity{} = @infinity{}}). +@item +Multiplication: @math{0 @mul{} @infinity{}}. +@item +Division: @math{0/0} or @math{@infinity{}/@infinity{}}. +@item +Remainder: @math{x} REM @math{y}, where @math{y} is zero or @math{x} is +infinite. +@item +Square root if the operand is less than zero. More generally, any +mathematical function evaluated outside its domain produces this +exception. +@item +Conversion of a floating-point number to an integer or decimal +string, when the number cannot be represented in the target format (due +to overflow, infinity, or NaN). +@item +Conversion of an unrecognizable input string. +@item +Comparison via predicates involving @math{<} or @math{>}, when one or +other of the operands is NaN. You can prevent this exception by using +the unordered comparison functions instead; see @ref{FP Comparison Functions}. +@end enumerate + +If the exception does not trap, the result of the operation is NaN. + +@item Division by Zero +This exception is raised when a finite nonzero number is divided +by zero. If no trap occurs the result is either @math{+@infinity{}} or +@math{-@infinity{}}, depending on the signs of the operands. + +@item Overflow +This exception is raised whenever the result cannot be represented +as a finite value in the precision format of the destination. If no trap +occurs the result depends on the sign of the intermediate result and the +current rounding mode (@w{IEEE 754}, @w{section 7.3}): +@enumerate +@item +Round to nearest carries all overflows to @math{@infinity{}} +with the sign of the intermediate result. +@item +Round toward @math{0} carries all overflows to the largest representable +finite number with the sign of the intermediate result. +@item +Round toward @math{-@infinity{}} carries positive overflows to the +largest representable finite number and negative overflows to +@math{-@infinity{}}. + +@item +Round toward @math{@infinity{}} carries negative overflows to the +most negative representable finite number and positive overflows +to @math{@infinity{}}. +@end enumerate + +Whenever the overflow exception is raised, the inexact exception is also +raised. + +@item Underflow +The underflow exception is raised when an intermediate result is too +small to be calculated accurately, or if the operation's result rounded +to the destination precision is too small to be normalized. + +When no trap is installed for the underflow exception, underflow is +signaled (via the underflow flag) only when both tininess and loss of +accuracy have been detected. If no trap handler is installed the +operation continues with an imprecise small value, or zero if the +destination precision cannot hold the small exact result. + +@item Inexact +This exception is signalled if a rounded result is not exact (such as +when calculating the square root of two) or a result overflows without +an overflow trap. +@end table + +@node Infinity and NaN +@subsection Infinity and NaN +@cindex infinity +@cindex not a number +@cindex NaN + +@w{IEEE 754} floating point numbers can represent positive or negative +infinity, and @dfn{NaN} (not a number). These three values arise from +calculations whose result is undefined or cannot be represented +accurately. You can also deliberately set a floating-point variable to +any of them, which is sometimes useful. Some examples of calculations +that produce infinity or NaN: + +@ifnottex +@smallexample +@math{1/0 = @infinity{}} +@math{log (0) = -@infinity{}} +@math{sqrt (-1) = NaN} +@end smallexample +@end ifnottex +@tex +$${1\over0} = \infty$$ +$$\log 0 = -\infty$$ +$$\sqrt{-1} = \hbox{NaN}$$ +@end tex + +When a calculation produces any of these values, an exception also +occurs; see @ref{FP Exceptions}. + +The basic operations and math functions all accept infinity and NaN and +produce sensible output. Infinities propagate through calculations as +one would expect: for example, @math{2 + @infinity{} = @infinity{}}, +@math{4/@infinity{} = 0}, atan @math{(@infinity{}) = @pi{}/2}. NaN, on +the other hand, infects any calculation that involves it. Unless the +calculation would produce the same result no matter what real value +replaced NaN, the result is NaN. + +In comparison operations, positive infinity is larger than all values +except itself and NaN, and negative infinity is smaller than all values +except itself and NaN. NaN is @dfn{unordered}: it is not equal to, +greater than, or less than anything, @emph{including itself}. @code{x == +x} is false if the value of @code{x} is NaN. You can use this to test +whether a value is NaN or not, but the recommended way to test for NaN +is with the @code{isnan} function (@pxref{Floating Point Classes}). In +addition, @code{<}, @code{>}, @code{<=}, and @code{>=} will raise an +exception when applied to NaNs. + +@file{math.h} defines macros that allow you to explicitly set a variable +to infinity or NaN. + +@comment math.h +@comment ISO +@deftypevr Macro float INFINITY +An expression representing positive infinity. It is equal to the value +produced by mathematical operations like @code{1.0 / 0.0}. +@code{-INFINITY} represents negative infinity. + +You can test whether a floating-point value is infinite by comparing it +to this macro. However, this is not recommended; you should use the +@code{isfinite} macro instead. @xref{Floating Point Classes}. + +This macro was introduced in the @w{ISO C99} standard. +@end deftypevr + +@comment math.h +@comment GNU +@deftypevr Macro float NAN +An expression representing a value which is ``not a number''. This +macro is a GNU extension, available only on machines that support the +``not a number'' value---that is to say, on all machines that support +IEEE floating point. + +You can use @samp{#ifdef NAN} to test whether the machine supports +NaN. (Of course, you must arrange for GNU extensions to be visible, +such as by defining @code{_GNU_SOURCE}, and then you must include +@file{math.h}.) +@end deftypevr + +@comment math.h +@comment ISO +@deftypevr Macro float SNANF +@deftypevrx Macro double SNAN +@deftypevrx Macro {long double} SNANL +These macros, defined by TS 18661-1:2014, are constant expressions for +signaling NaNs. +@end deftypevr + +@comment fenv.h +@comment ISO +@deftypevr Macro int FE_SNANS_ALWAYS_SIGNAL +This macro, defined by TS 18661-1:2014, is defined to @code{1} in +@file{fenv.h} to indicate that functions and operations with signaling +NaN inputs and floating-point results always raise the invalid +exception and return a quiet NaN, even in cases (such as @code{fmax}, +@code{hypot} and @code{pow}) where a quiet NaN input can produce a +non-NaN result. Because some compiler optimizations may not handle +signaling NaNs correctly, this macro is only defined if compiler +support for signaling NaNs is enabled. That support can be enabled +with the GCC option @option{-fsignaling-nans}. +@end deftypevr + +@w{IEEE 754} also allows for another unusual value: negative zero. This +value is produced when you divide a positive number by negative +infinity, or when a negative result is smaller than the limits of +representation. + +@node Status bit operations +@subsection Examining the FPU status word + +@w{ISO C99} defines functions to query and manipulate the +floating-point status word. You can use these functions to check for +untrapped exceptions when it's convenient, rather than worrying about +them in the middle of a calculation. + +These constants represent the various @w{IEEE 754} exceptions. Not all +FPUs report all the different exceptions. Each constant is defined if +and only if the FPU you are compiling for supports that exception, so +you can test for FPU support with @samp{#ifdef}. They are defined in +@file{fenv.h}. + +@vtable @code +@comment fenv.h +@comment ISO +@item FE_INEXACT + The inexact exception. +@comment fenv.h +@comment ISO +@item FE_DIVBYZERO + The divide by zero exception. +@comment fenv.h +@comment ISO +@item FE_UNDERFLOW + The underflow exception. +@comment fenv.h +@comment ISO +@item FE_OVERFLOW + The overflow exception. +@comment fenv.h +@comment ISO +@item FE_INVALID + The invalid exception. +@end vtable + +The macro @code{FE_ALL_EXCEPT} is the bitwise OR of all exception macros +which are supported by the FP implementation. + +These functions allow you to clear exception flags, test for exceptions, +and save and restore the set of exceptions flagged. + +@comment fenv.h +@comment ISO +@deftypefun int feclearexcept (int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{@assposix{}}@acsafe{@acsposix{}}} +@c The other functions in this section that modify FP status register +@c mostly do so with non-atomic load-modify-store sequences, but since +@c the register is thread-specific, this should be fine, and safe for +@c cancellation. As long as the FP environment is restored before the +@c signal handler returns control to the interrupted thread (like any +@c kernel should do), the functions are also safe for use in signal +@c handlers. +This function clears all of the supported exception flags indicated by +@var{excepts}. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@comment fenv.h +@comment ISO +@deftypefun int feraiseexcept (int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function raises the supported exceptions indicated by +@var{excepts}. If more than one exception bit in @var{excepts} is set +the order in which the exceptions are raised is undefined except that +overflow (@code{FE_OVERFLOW}) or underflow (@code{FE_UNDERFLOW}) are +raised before inexact (@code{FE_INEXACT}). Whether for overflow or +underflow the inexact exception is also raised is also implementation +dependent. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@comment fenv.h +@comment ISO +@deftypefun int fesetexcept (int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function sets the supported exception flags indicated by +@var{excepts}, like @code{feraiseexcept}, but without causing enabled +traps to be taken. @code{fesetexcept} is from TS 18661-1:2014. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@comment fenv.h +@comment ISO +@deftypefun int fetestexcept (int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Test whether the exception flags indicated by the parameter @var{except} +are currently set. If any of them are, a nonzero value is returned +which specifies which exceptions are set. Otherwise the result is zero. +@end deftypefun + +To understand these functions, imagine that the status word is an +integer variable named @var{status}. @code{feclearexcept} is then +equivalent to @samp{status &= ~excepts} and @code{fetestexcept} is +equivalent to @samp{(status & excepts)}. The actual implementation may +be very different, of course. + +Exception flags are only cleared when the program explicitly requests it, +by calling @code{feclearexcept}. If you want to check for exceptions +from a set of calculations, you should clear all the flags first. Here +is a simple example of the way to use @code{fetestexcept}: + +@smallexample +@{ + double f; + int raised; + feclearexcept (FE_ALL_EXCEPT); + f = compute (); + raised = fetestexcept (FE_OVERFLOW | FE_INVALID); + if (raised & FE_OVERFLOW) @{ /* @dots{} */ @} + if (raised & FE_INVALID) @{ /* @dots{} */ @} + /* @dots{} */ +@} +@end smallexample + +You cannot explicitly set bits in the status word. You can, however, +save the entire status word and restore it later. This is done with the +following functions: + +@comment fenv.h +@comment ISO +@deftypefun int fegetexceptflag (fexcept_t *@var{flagp}, int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function stores in the variable pointed to by @var{flagp} an +implementation-defined value representing the current setting of the +exception flags indicated by @var{excepts}. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@comment fenv.h +@comment ISO +@deftypefun int fesetexceptflag (const fexcept_t *@var{flagp}, int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function restores the flags for the exceptions indicated by +@var{excepts} to the values stored in the variable pointed to by +@var{flagp}. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +Note that the value stored in @code{fexcept_t} bears no resemblance to +the bit mask returned by @code{fetestexcept}. The type may not even be +an integer. Do not attempt to modify an @code{fexcept_t} variable. + +@comment fenv.h +@comment ISO +@deftypefun int fetestexceptflag (const fexcept_t *@var{flagp}, int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Test whether the exception flags indicated by the parameter +@var{excepts} are set in the variable pointed to by @var{flagp}. If +any of them are, a nonzero value is returned which specifies which +exceptions are set. Otherwise the result is zero. +@code{fetestexceptflag} is from TS 18661-1:2014. +@end deftypefun + +@node Math Error Reporting +@subsection Error Reporting by Mathematical Functions +@cindex errors, mathematical +@cindex domain error +@cindex range error + +Many of the math functions are defined only over a subset of the real or +complex numbers. Even if they are mathematically defined, their result +may be larger or smaller than the range representable by their return +type without loss of accuracy. These are known as @dfn{domain errors}, +@dfn{overflows}, and +@dfn{underflows}, respectively. Math functions do several things when +one of these errors occurs. In this manual we will refer to the +complete response as @dfn{signalling} a domain error, overflow, or +underflow. + +When a math function suffers a domain error, it raises the invalid +exception and returns NaN. It also sets @var{errno} to @code{EDOM}; +this is for compatibility with old systems that do not support @w{IEEE +754} exception handling. Likewise, when overflow occurs, math +functions raise the overflow exception and, in the default rounding +mode, return @math{@infinity{}} or @math{-@infinity{}} as appropriate +(in other rounding modes, the largest finite value of the appropriate +sign is returned when appropriate for that rounding mode). They also +set @var{errno} to @code{ERANGE} if returning @math{@infinity{}} or +@math{-@infinity{}}; @var{errno} may or may not be set to +@code{ERANGE} when a finite value is returned on overflow. When +underflow occurs, the underflow exception is raised, and zero +(appropriately signed) or a subnormal value, as appropriate for the +mathematical result of the function and the rounding mode, is +returned. @var{errno} may be set to @code{ERANGE}, but this is not +guaranteed; it is intended that @theglibc{} should set it when the +underflow is to an appropriately signed zero, but not necessarily for +other underflows. + +When a math function has an argument that is a signaling NaN, +@theglibc{} does not consider this a domain error, so @code{errno} is +unchanged, but the invalid exception is still raised (except for a few +functions that are specified to handle signaling NaNs differently). + +Some of the math functions are defined mathematically to result in a +complex value over parts of their domains. The most familiar example of +this is taking the square root of a negative number. The complex math +functions, such as @code{csqrt}, will return the appropriate complex value +in this case. The real-valued functions, such as @code{sqrt}, will +signal a domain error. + +Some older hardware does not support infinities. On that hardware, +overflows instead return a particular very large number (usually the +largest representable number). @file{math.h} defines macros you can use +to test for overflow on both old and new hardware. + +@comment math.h +@comment ISO +@deftypevr Macro double HUGE_VAL +@comment math.h +@comment ISO +@deftypevrx Macro float HUGE_VALF +@comment math.h +@comment ISO +@deftypevrx Macro {long double} HUGE_VALL +An expression representing a particular very large number. On machines +that use @w{IEEE 754} floating point format, @code{HUGE_VAL} is infinity. +On other machines, it's typically the largest positive number that can +be represented. + +Mathematical functions return the appropriately typed version of +@code{HUGE_VAL} or @code{@minus{}HUGE_VAL} when the result is too large +to be represented. +@end deftypevr + +@node Rounding +@section Rounding Modes + +Floating-point calculations are carried out internally with extra +precision, and then rounded to fit into the destination type. This +ensures that results are as precise as the input data. @w{IEEE 754} +defines four possible rounding modes: + +@table @asis +@item Round to nearest. +This is the default mode. It should be used unless there is a specific +need for one of the others. In this mode results are rounded to the +nearest representable value. If the result is midway between two +representable values, the even representable is chosen. @dfn{Even} here +means the lowest-order bit is zero. This rounding mode prevents +statistical bias and guarantees numeric stability: round-off errors in a +lengthy calculation will remain smaller than half of @code{FLT_EPSILON}. + +@c @item Round toward @math{+@infinity{}} +@item Round toward plus Infinity. +All results are rounded to the smallest representable value +which is greater than the result. + +@c @item Round toward @math{-@infinity{}} +@item Round toward minus Infinity. +All results are rounded to the largest representable value which is less +than the result. + +@item Round toward zero. +All results are rounded to the largest representable value whose +magnitude is less than that of the result. In other words, if the +result is negative it is rounded up; if it is positive, it is rounded +down. +@end table + +@noindent +@file{fenv.h} defines constants which you can use to refer to the +various rounding modes. Each one will be defined if and only if the FPU +supports the corresponding rounding mode. + +@vtable @code +@comment fenv.h +@comment ISO +@item FE_TONEAREST +Round to nearest. + +@comment fenv.h +@comment ISO +@item FE_UPWARD +Round toward @math{+@infinity{}}. + +@comment fenv.h +@comment ISO +@item FE_DOWNWARD +Round toward @math{-@infinity{}}. + +@comment fenv.h +@comment ISO +@item FE_TOWARDZERO +Round toward zero. +@end vtable + +Underflow is an unusual case. Normally, @w{IEEE 754} floating point +numbers are always normalized (@pxref{Floating Point Concepts}). +Numbers smaller than @math{2^r} (where @math{r} is the minimum exponent, +@code{FLT_MIN_RADIX-1} for @var{float}) cannot be represented as +normalized numbers. Rounding all such numbers to zero or @math{2^r} +would cause some algorithms to fail at 0. Therefore, they are left in +denormalized form. That produces loss of precision, since some bits of +the mantissa are stolen to indicate the decimal point. + +If a result is too small to be represented as a denormalized number, it +is rounded to zero. However, the sign of the result is preserved; if +the calculation was negative, the result is @dfn{negative zero}. +Negative zero can also result from some operations on infinity, such as +@math{4/-@infinity{}}. + +At any time, one of the above four rounding modes is selected. You can +find out which one with this function: + +@comment fenv.h +@comment ISO +@deftypefun int fegetround (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns the currently selected rounding mode, represented by one of the +values of the defined rounding mode macros. +@end deftypefun + +@noindent +To change the rounding mode, use this function: + +@comment fenv.h +@comment ISO +@deftypefun int fesetround (int @var{round}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Changes the currently selected rounding mode to @var{round}. If +@var{round} does not correspond to one of the supported rounding modes +nothing is changed. @code{fesetround} returns zero if it changed the +rounding mode, or a nonzero value if the mode is not supported. +@end deftypefun + +You should avoid changing the rounding mode if possible. It can be an +expensive operation; also, some hardware requires you to compile your +program differently for it to work. The resulting code may run slower. +See your compiler documentation for details. +@c This section used to claim that functions existed to round one number +@c in a specific fashion. I can't find any functions in the library +@c that do that. -zw + +@node Control Functions +@section Floating-Point Control Functions + +@w{IEEE 754} floating-point implementations allow the programmer to +decide whether traps will occur for each of the exceptions, by setting +bits in the @dfn{control word}. In C, traps result in the program +receiving the @code{SIGFPE} signal; see @ref{Signal Handling}. + +@strong{NB:} @w{IEEE 754} says that trap handlers are given details of +the exceptional situation, and can set the result value. C signals do +not provide any mechanism to pass this information back and forth. +Trapping exceptions in C is therefore not very useful. + +It is sometimes necessary to save the state of the floating-point unit +while you perform some calculation. The library provides functions +which save and restore the exception flags, the set of exceptions that +generate traps, and the rounding mode. This information is known as the +@dfn{floating-point environment}. + +The functions to save and restore the floating-point environment all use +a variable of type @code{fenv_t} to store information. This type is +defined in @file{fenv.h}. Its size and contents are +implementation-defined. You should not attempt to manipulate a variable +of this type directly. + +To save the state of the FPU, use one of these functions: + +@comment fenv.h +@comment ISO +@deftypefun int fegetenv (fenv_t *@var{envp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Store the floating-point environment in the variable pointed to by +@var{envp}. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@comment fenv.h +@comment ISO +@deftypefun int feholdexcept (fenv_t *@var{envp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Store the current floating-point environment in the object pointed to by +@var{envp}. Then clear all exception flags, and set the FPU to trap no +exceptions. Not all FPUs support trapping no exceptions; if +@code{feholdexcept} cannot set this mode, it returns nonzero value. If it +succeeds, it returns zero. +@end deftypefun + +The functions which restore the floating-point environment can take these +kinds of arguments: + +@itemize @bullet +@item +Pointers to @code{fenv_t} objects, which were initialized previously by a +call to @code{fegetenv} or @code{feholdexcept}. +@item +@vindex FE_DFL_ENV +The special macro @code{FE_DFL_ENV} which represents the floating-point +environment as it was available at program start. +@item +Implementation defined macros with names starting with @code{FE_} and +having type @code{fenv_t *}. + +@vindex FE_NOMASK_ENV +If possible, @theglibc{} defines a macro @code{FE_NOMASK_ENV} +which represents an environment where every exception raised causes a +trap to occur. You can test for this macro using @code{#ifdef}. It is +only defined if @code{_GNU_SOURCE} is defined. + +Some platforms might define other predefined environments. +@end itemize + +@noindent +To set the floating-point environment, you can use either of these +functions: + +@comment fenv.h +@comment ISO +@deftypefun int fesetenv (const fenv_t *@var{envp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Set the floating-point environment to that described by @var{envp}. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@comment fenv.h +@comment ISO +@deftypefun int feupdateenv (const fenv_t *@var{envp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Like @code{fesetenv}, this function sets the floating-point environment +to that described by @var{envp}. However, if any exceptions were +flagged in the status word before @code{feupdateenv} was called, they +remain flagged after the call. In other words, after @code{feupdateenv} +is called, the status word is the bitwise OR of the previous status word +and the one saved in @var{envp}. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@noindent +TS 18661-1:2014 defines additional functions to save and restore +floating-point control modes (such as the rounding mode and whether +traps are enabled) while leaving other status (such as raised flags) +unchanged. + +@vindex FE_DFL_MODE +The special macro @code{FE_DFL_MODE} may be passed to +@code{fesetmode}. It represents the floating-point control modes at +program start. + +@comment fenv.h +@comment ISO +@deftypefun int fegetmode (femode_t *@var{modep}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Store the floating-point control modes in the variable pointed to by +@var{modep}. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@comment fenv.h +@comment ISO +@deftypefun int fesetmode (const femode_t *@var{modep}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Set the floating-point control modes to those described by +@var{modep}. + +The function returns zero in case the operation was successful, a +non-zero value otherwise. +@end deftypefun + +@noindent +To control for individual exceptions if raising them causes a trap to +occur, you can use the following two functions. + +@strong{Portability Note:} These functions are all GNU extensions. + +@comment fenv.h +@comment GNU +@deftypefun int feenableexcept (int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function enables traps for each of the exceptions as indicated by +the parameter @var{excepts}. The individual exceptions are described in +@ref{Status bit operations}. Only the specified exceptions are +enabled, the status of the other exceptions is not changed. + +The function returns the previous enabled exceptions in case the +operation was successful, @code{-1} otherwise. +@end deftypefun + +@comment fenv.h +@comment GNU +@deftypefun int fedisableexcept (int @var{excepts}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function disables traps for each of the exceptions as indicated by +the parameter @var{excepts}. The individual exceptions are described in +@ref{Status bit operations}. Only the specified exceptions are +disabled, the status of the other exceptions is not changed. + +The function returns the previous enabled exceptions in case the +operation was successful, @code{-1} otherwise. +@end deftypefun + +@comment fenv.h +@comment GNU +@deftypefun int fegetexcept (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The function returns a bitmask of all currently enabled exceptions. It +returns @code{-1} in case of failure. +@end deftypefun + +@node Arithmetic Functions +@section Arithmetic Functions + +The C library provides functions to do basic operations on +floating-point numbers. These include absolute value, maximum and minimum, +normalization, bit twiddling, rounding, and a few others. + +@menu +* Absolute Value:: Absolute values of integers and floats. +* Normalization Functions:: Extracting exponents and putting them back. +* Rounding Functions:: Rounding floats to integers. +* Remainder Functions:: Remainders on division, precisely defined. +* FP Bit Twiddling:: Sign bit adjustment. Adding epsilon. +* FP Comparison Functions:: Comparisons without risk of exceptions. +* Misc FP Arithmetic:: Max, min, positive difference, multiply-add. +@end menu + +@node Absolute Value +@subsection Absolute Value +@cindex absolute value functions + +These functions are provided for obtaining the @dfn{absolute value} (or +@dfn{magnitude}) of a number. The absolute value of a real number +@var{x} is @var{x} if @var{x} is positive, @minus{}@var{x} if @var{x} is +negative. For a complex number @var{z}, whose real part is @var{x} and +whose imaginary part is @var{y}, the absolute value is @w{@code{sqrt +(@var{x}*@var{x} + @var{y}*@var{y})}}. + +@pindex math.h +@pindex stdlib.h +Prototypes for @code{abs}, @code{labs} and @code{llabs} are in @file{stdlib.h}; +@code{imaxabs} is declared in @file{inttypes.h}; +@code{fabs}, @code{fabsf} and @code{fabsl} are declared in @file{math.h}. +@code{cabs}, @code{cabsf} and @code{cabsl} are declared in @file{complex.h}. + +@comment stdlib.h +@comment ISO +@deftypefun int abs (int @var{number}) +@comment stdlib.h +@comment ISO +@deftypefunx {long int} labs (long int @var{number}) +@comment stdlib.h +@comment ISO +@deftypefunx {long long int} llabs (long long int @var{number}) +@comment inttypes.h +@comment ISO +@deftypefunx intmax_t imaxabs (intmax_t @var{number}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the absolute value of @var{number}. + +Most computers use a two's complement integer representation, in which +the absolute value of @code{INT_MIN} (the smallest possible @code{int}) +cannot be represented; thus, @w{@code{abs (INT_MIN)}} is not defined. + +@code{llabs} and @code{imaxdiv} are new to @w{ISO C99}. + +See @ref{Integers} for a description of the @code{intmax_t} type. + +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double fabs (double @var{number}) +@comment math.h +@comment ISO +@deftypefunx float fabsf (float @var{number}) +@comment math.h +@comment ISO +@deftypefunx {long double} fabsl (long double @var{number}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function returns the absolute value of the floating-point number +@var{number}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun double cabs (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx float cabsf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {long double} cabsl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the absolute value of the complex number @var{z} +(@pxref{Complex Numbers}). The absolute value of a complex number is: + +@smallexample +sqrt (creal (@var{z}) * creal (@var{z}) + cimag (@var{z}) * cimag (@var{z})) +@end smallexample + +This function should always be used instead of the direct formula +because it takes special care to avoid losing precision. It may also +take advantage of hardware support for this operation. See @code{hypot} +in @ref{Exponents and Logarithms}. +@end deftypefun + +@node Normalization Functions +@subsection Normalization Functions +@cindex normalization functions (floating-point) + +The functions described in this section are primarily provided as a way +to efficiently perform certain low-level manipulations on floating point +numbers that are represented internally using a binary radix; +see @ref{Floating Point Concepts}. These functions are required to +have equivalent behavior even if the representation does not use a radix +of 2, but of course they are unlikely to be particularly efficient in +those cases. + +@pindex math.h +All these functions are declared in @file{math.h}. + +@comment math.h +@comment ISO +@deftypefun double frexp (double @var{value}, int *@var{exponent}) +@comment math.h +@comment ISO +@deftypefunx float frexpf (float @var{value}, int *@var{exponent}) +@comment math.h +@comment ISO +@deftypefunx {long double} frexpl (long double @var{value}, int *@var{exponent}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are used to split the number @var{value} +into a normalized fraction and an exponent. + +If the argument @var{value} is not zero, the return value is @var{value} +times a power of two, and its magnitude is always in the range 1/2 +(inclusive) to 1 (exclusive). The corresponding exponent is stored in +@code{*@var{exponent}}; the return value multiplied by 2 raised to this +exponent equals the original number @var{value}. + +For example, @code{frexp (12.8, &exponent)} returns @code{0.8} and +stores @code{4} in @code{exponent}. + +If @var{value} is zero, then the return value is zero and +zero is stored in @code{*@var{exponent}}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double ldexp (double @var{value}, int @var{exponent}) +@comment math.h +@comment ISO +@deftypefunx float ldexpf (float @var{value}, int @var{exponent}) +@comment math.h +@comment ISO +@deftypefunx {long double} ldexpl (long double @var{value}, int @var{exponent}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the result of multiplying the floating-point +number @var{value} by 2 raised to the power @var{exponent}. (It can +be used to reassemble floating-point numbers that were taken apart +by @code{frexp}.) + +For example, @code{ldexp (0.8, 4)} returns @code{12.8}. +@end deftypefun + +The following functions, which come from BSD, provide facilities +equivalent to those of @code{ldexp} and @code{frexp}. See also the +@w{ISO C} function @code{logb} which originally also appeared in BSD. + +@comment math.h +@comment BSD +@deftypefun double scalb (double @var{value}, double @var{exponent}) +@comment math.h +@comment BSD +@deftypefunx float scalbf (float @var{value}, float @var{exponent}) +@comment math.h +@comment BSD +@deftypefunx {long double} scalbl (long double @var{value}, long double @var{exponent}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{scalb} function is the BSD name for @code{ldexp}. +@end deftypefun + +@comment math.h +@comment BSD +@deftypefun double scalbn (double @var{x}, int @var{n}) +@comment math.h +@comment BSD +@deftypefunx float scalbnf (float @var{x}, int @var{n}) +@comment math.h +@comment BSD +@deftypefunx {long double} scalbnl (long double @var{x}, int @var{n}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{scalbn} is identical to @code{scalb}, except that the exponent +@var{n} is an @code{int} instead of a floating-point number. +@end deftypefun + +@comment math.h +@comment BSD +@deftypefun double scalbln (double @var{x}, long int @var{n}) +@comment math.h +@comment BSD +@deftypefunx float scalblnf (float @var{x}, long int @var{n}) +@comment math.h +@comment BSD +@deftypefunx {long double} scalblnl (long double @var{x}, long int @var{n}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{scalbln} is identical to @code{scalb}, except that the exponent +@var{n} is a @code{long int} instead of a floating-point number. +@end deftypefun + +@comment math.h +@comment BSD +@deftypefun double significand (double @var{x}) +@comment math.h +@comment BSD +@deftypefunx float significandf (float @var{x}) +@comment math.h +@comment BSD +@deftypefunx {long double} significandl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{significand} returns the mantissa of @var{x} scaled to the range +@math{[1, 2)}. +It is equivalent to @w{@code{scalb (@var{x}, (double) -ilogb (@var{x}))}}. + +This function exists mainly for use in certain standardized tests +of @w{IEEE 754} conformance. +@end deftypefun + +@node Rounding Functions +@subsection Rounding Functions +@cindex converting floats to integers + +@pindex math.h +The functions listed here perform operations such as rounding and +truncation of floating-point values. Some of these functions convert +floating point numbers to integer values. They are all declared in +@file{math.h}. + +You can also convert floating-point numbers to integers simply by +casting them to @code{int}. This discards the fractional part, +effectively rounding towards zero. However, this only works if the +result can actually be represented as an @code{int}---for very large +numbers, this is impossible. The functions listed here return the +result as a @code{double} instead to get around this problem. + +The @code{fromfp} functions use the following macros, from TS +18661-1:2014, to specify the direction of rounding. These correspond +to the rounding directions defined in IEEE 754-2008. + +@vtable @code +@comment math.h +@comment ISO +@item FP_INT_UPWARD +Round toward @math{+@infinity{}}. + +@comment math.h +@comment ISO +@item FP_INT_DOWNWARD +Round toward @math{-@infinity{}}. + +@comment math.h +@comment ISO +@item FP_INT_TOWARDZERO +Round toward zero. + +@comment math.h +@comment ISO +@item FP_INT_TONEARESTFROMZERO +Round to nearest, ties round away from zero. + +@comment math.h +@comment ISO +@item FP_INT_TONEAREST +Round to nearest, ties round to even. +@end vtable + +@comment math.h +@comment ISO +@deftypefun double ceil (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float ceilf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} ceill (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions round @var{x} upwards to the nearest integer, +returning that value as a @code{double}. Thus, @code{ceil (1.5)} +is @code{2.0}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double floor (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float floorf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} floorl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions round @var{x} downwards to the nearest +integer, returning that value as a @code{double}. Thus, @code{floor +(1.5)} is @code{1.0} and @code{floor (-1.5)} is @code{-2.0}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double trunc (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float truncf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} truncl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{trunc} functions round @var{x} towards zero to the nearest +integer (returned in floating-point format). Thus, @code{trunc (1.5)} +is @code{1.0} and @code{trunc (-1.5)} is @code{-1.0}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double rint (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float rintf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} rintl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions round @var{x} to an integer value according to the +current rounding mode. @xref{Floating Point Parameters}, for +information about the various rounding modes. The default +rounding mode is to round to the nearest integer; some machines +support other modes, but round-to-nearest is always used unless +you explicitly select another. + +If @var{x} was not initially an integer, these functions raise the +inexact exception. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double nearbyint (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float nearbyintf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} nearbyintl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the same value as the @code{rint} functions, but +do not raise the inexact exception if @var{x} is not an integer. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double round (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float roundf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} roundl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are similar to @code{rint}, but they round halfway +cases away from zero instead of to the nearest integer (or other +current rounding mode). +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double roundeven (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float roundevenf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} roundevenl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions, from TS 18661-1:2014, are similar to @code{round}, +but they round halfway cases to even instead of away from zero. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun {long int} lrint (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long int} lrintf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long int} lrintl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are just like @code{rint}, but they return a +@code{long int} instead of a floating-point number. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun {long long int} llrint (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long long int} llrintf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long long int} llrintl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are just like @code{rint}, but they return a +@code{long long int} instead of a floating-point number. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun {long int} lround (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long int} lroundf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long int} lroundl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are just like @code{round}, but they return a +@code{long int} instead of a floating-point number. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun {long long int} llround (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long long int} llroundf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long long int} llroundl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are just like @code{round}, but they return a +@code{long long int} instead of a floating-point number. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun intmax_t fromfp (double @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx intmax_t fromfpf (float @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx intmax_t fromfpl (long double @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx uintmax_t ufromfp (double @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx uintmax_t ufromfpf (float @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx uintmax_t ufromfpl (long double @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx intmax_t fromfpx (double @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx intmax_t fromfpxf (float @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx intmax_t fromfpxl (long double @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx uintmax_t ufromfpx (double @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx uintmax_t ufromfpxf (float @var{x}, int @var{round}, unsigned int @var{width}) +@comment math.h +@comment ISO +@deftypefunx uintmax_t ufromfpxl (long double @var{x}, int @var{round}, unsigned int @var{width}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions, from TS 18661-1:2014, convert a floating-point number +to an integer according to the rounding direction @var{round} (one of +the @code{FP_INT_*} macros). If the integer is outside the range of a +signed or unsigned (depending on the return type of the function) type +of width @var{width} bits (or outside the range of the return type, if +@var{width} is larger), or if @var{x} is infinite or NaN, or if +@var{width} is zero, a domain error occurs and an unspecified value is +returned. The functions with an @samp{x} in their names raise the +inexact exception when a domain error does not occur and the argument +is not an integer; the other functions do not raise the inexact +exception. +@end deftypefun + + +@comment math.h +@comment ISO +@deftypefun double modf (double @var{value}, double *@var{integer-part}) +@comment math.h +@comment ISO +@deftypefunx float modff (float @var{value}, float *@var{integer-part}) +@comment math.h +@comment ISO +@deftypefunx {long double} modfl (long double @var{value}, long double *@var{integer-part}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions break the argument @var{value} into an integer part and a +fractional part (between @code{-1} and @code{1}, exclusive). Their sum +equals @var{value}. Each of the parts has the same sign as @var{value}, +and the integer part is always rounded toward zero. + +@code{modf} stores the integer part in @code{*@var{integer-part}}, and +returns the fractional part. For example, @code{modf (2.5, &intpart)} +returns @code{0.5} and stores @code{2.0} into @code{intpart}. +@end deftypefun + +@node Remainder Functions +@subsection Remainder Functions + +The functions in this section compute the remainder on division of two +floating-point numbers. Each is a little different; pick the one that +suits your problem. + +@comment math.h +@comment ISO +@deftypefun double fmod (double @var{numerator}, double @var{denominator}) +@comment math.h +@comment ISO +@deftypefunx float fmodf (float @var{numerator}, float @var{denominator}) +@comment math.h +@comment ISO +@deftypefunx {long double} fmodl (long double @var{numerator}, long double @var{denominator}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute the remainder from the division of +@var{numerator} by @var{denominator}. Specifically, the return value is +@code{@var{numerator} - @w{@var{n} * @var{denominator}}}, where @var{n} +is the quotient of @var{numerator} divided by @var{denominator}, rounded +towards zero to an integer. Thus, @w{@code{fmod (6.5, 2.3)}} returns +@code{1.9}, which is @code{6.5} minus @code{4.6}. + +The result has the same sign as the @var{numerator} and has magnitude +less than the magnitude of the @var{denominator}. + +If @var{denominator} is zero, @code{fmod} signals a domain error. +@end deftypefun + +@comment math.h +@comment BSD +@deftypefun double drem (double @var{numerator}, double @var{denominator}) +@comment math.h +@comment BSD +@deftypefunx float dremf (float @var{numerator}, float @var{denominator}) +@comment math.h +@comment BSD +@deftypefunx {long double} dreml (long double @var{numerator}, long double @var{denominator}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are like @code{fmod} except that they round the +internal quotient @var{n} to the nearest integer instead of towards zero +to an integer. For example, @code{drem (6.5, 2.3)} returns @code{-0.4}, +which is @code{6.5} minus @code{6.9}. + +The absolute value of the result is less than or equal to half the +absolute value of the @var{denominator}. The difference between +@code{fmod (@var{numerator}, @var{denominator})} and @code{drem +(@var{numerator}, @var{denominator})} is always either +@var{denominator}, minus @var{denominator}, or zero. + +If @var{denominator} is zero, @code{drem} signals a domain error. +@end deftypefun + +@comment math.h +@comment BSD +@deftypefun double remainder (double @var{numerator}, double @var{denominator}) +@comment math.h +@comment BSD +@deftypefunx float remainderf (float @var{numerator}, float @var{denominator}) +@comment math.h +@comment BSD +@deftypefunx {long double} remainderl (long double @var{numerator}, long double @var{denominator}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is another name for @code{drem}. +@end deftypefun + +@node FP Bit Twiddling +@subsection Setting and modifying single bits of FP values +@cindex FP arithmetic + +There are some operations that are too complicated or expensive to +perform by hand on floating-point numbers. @w{ISO C99} defines +functions to do these operations, which mostly involve changing single +bits. + +@comment math.h +@comment ISO +@deftypefun double copysign (double @var{x}, double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float copysignf (float @var{x}, float @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} copysignl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return @var{x} but with the sign of @var{y}. They work +even if @var{x} or @var{y} are NaN or zero. Both of these can carry a +sign (although not all implementations support it) and this is one of +the few operations that can tell the difference. + +@code{copysign} never raises an exception. +@c except signalling NaNs + +This function is defined in @w{IEC 559} (and the appendix with +recommended functions in @w{IEEE 754}/@w{IEEE 854}). +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun int signbit (@emph{float-type} @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{signbit} is a generic macro which can work on all floating-point +types. It returns a nonzero value if the value of @var{x} has its sign +bit set. + +This is not the same as @code{x < 0.0}, because @w{IEEE 754} floating +point allows zero to be signed. The comparison @code{-0.0 < 0.0} is +false, but @code{signbit (-0.0)} will return a nonzero value. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double nextafter (double @var{x}, double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float nextafterf (float @var{x}, float @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} nextafterl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{nextafter} function returns the next representable neighbor of +@var{x} in the direction towards @var{y}. The size of the step between +@var{x} and the result depends on the type of the result. If +@math{@var{x} = @var{y}} the function simply returns @var{y}. If either +value is @code{NaN}, @code{NaN} is returned. Otherwise +a value corresponding to the value of the least significant bit in the +mantissa is added or subtracted, depending on the direction. +@code{nextafter} will signal overflow or underflow if the result goes +outside of the range of normalized numbers. + +This function is defined in @w{IEC 559} (and the appendix with +recommended functions in @w{IEEE 754}/@w{IEEE 854}). +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double nexttoward (double @var{x}, long double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float nexttowardf (float @var{x}, long double @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} nexttowardl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are identical to the corresponding versions of +@code{nextafter} except that their second argument is a @code{long +double}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double nextup (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float nextupf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} nextupl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{nextup} function returns the next representable neighbor of @var{x} +in the direction of positive infinity. If @var{x} is the smallest negative +subnormal number in the type of @var{x} the function returns @code{-0}. If +@math{@var{x} = @code{0}} the function returns the smallest positive subnormal +number in the type of @var{x}. If @var{x} is NaN, NaN is returned. +If @var{x} is @math{+@infinity{}}, @math{+@infinity{}} is returned. +@code{nextup} is from TS 18661-1:2014. +@code{nextup} never raises an exception except for signaling NaNs. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double nextdown (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float nextdownf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} nextdownl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{nextdown} function returns the next representable neighbor of @var{x} +in the direction of negative infinity. If @var{x} is the smallest positive +subnormal number in the type of @var{x} the function returns @code{+0}. If +@math{@var{x} = @code{0}} the function returns the smallest negative subnormal +number in the type of @var{x}. If @var{x} is NaN, NaN is returned. +If @var{x} is @math{-@infinity{}}, @math{-@infinity{}} is returned. +@code{nextdown} is from TS 18661-1:2014. +@code{nextdown} never raises an exception except for signaling NaNs. +@end deftypefun + +@cindex NaN +@comment math.h +@comment ISO +@deftypefun double nan (const char *@var{tagp}) +@comment math.h +@comment ISO +@deftypefunx float nanf (const char *@var{tagp}) +@comment math.h +@comment ISO +@deftypefunx {long double} nanl (const char *@var{tagp}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c The unsafe-but-ruled-safe locale use comes from strtod. +The @code{nan} function returns a representation of NaN, provided that +NaN is supported by the target platform. +@code{nan ("@var{n-char-sequence}")} is equivalent to +@code{strtod ("NAN(@var{n-char-sequence})")}. + +The argument @var{tagp} is used in an unspecified manner. On @w{IEEE +754} systems, there are many representations of NaN, and @var{tagp} +selects one. On other systems it may do nothing. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun int canonicalize (double *@var{cx}, const double *@var{x}) +@comment math.h +@comment ISO +@deftypefunx int canonicalizef (float *@var{cx}, const float *@var{x}) +@comment math.h +@comment ISO +@deftypefunx int canonicalizel (long double *@var{cx}, const long double *@var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +In some floating-point formats, some values have canonical (preferred) +and noncanonical encodings (for IEEE interchange binary formats, all +encodings are canonical). These functions, defined by TS +18661-1:2014, attempt to produce a canonical version of the +floating-point value pointed to by @var{x}; if that value is a +signaling NaN, they raise the invalid exception and produce a quiet +NaN. If a canonical value is produced, it is stored in the object +pointed to by @var{cx}, and these functions return zero. Otherwise +(if a canonical value could not be produced because the object pointed +to by @var{x} is not a valid representation of any floating-point +value), the object pointed to by @var{cx} is unchanged and a nonzero +value is returned. + +Note that some formats have multiple encodings of a value which are +all equally canonical; when such an encoding is used as an input to +this function, any such encoding of the same value (or of the +corresponding quiet NaN, if that value is a signaling NaN) may be +produced as output. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double getpayload (const double *@var{x}) +@comment math.h +@comment ISO +@deftypefunx float getpayloadf (const float *@var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} getpayloadl (const long double *@var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +IEEE 754 defines the @dfn{payload} of a NaN to be an integer value +encoded in the representation of the NaN. Payloads are typically +propagated from NaN inputs to the result of a floating-point +operation. These functions, defined by TS 18661-1:2014, return the +payload of the NaN pointed to by @var{x} (returned as a positive +integer, or positive zero, represented as a floating-point number); if +@var{x} is not a NaN, they return an unspecified value. They raise no +floating-point exceptions even for signaling NaNs. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun int setpayload (double *@var{x}, double @var{payload}) +@comment math.h +@comment ISO +@deftypefunx int setpayloadf (float *@var{x}, float @var{payload}) +@comment math.h +@comment ISO +@deftypefunx int setpayloadl (long double *@var{x}, long double @var{payload}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions, defined by TS 18661-1:2014, set the object pointed to +by @var{x} to a quiet NaN with payload @var{payload} and a zero sign +bit and return zero. If @var{payload} is not a positive-signed +integer that is a valid payload for a quiet NaN of the given type, the +object pointed to by @var{x} is set to positive zero and a nonzero +value is returned. They raise no floating-point exceptions. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun int setpayloadsig (double *@var{x}, double @var{payload}) +@comment math.h +@comment ISO +@deftypefunx int setpayloadsigf (float *@var{x}, float @var{payload}) +@comment math.h +@comment ISO +@deftypefunx int setpayloadsigl (long double *@var{x}, long double @var{payload}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions, defined by TS 18661-1:2014, set the object pointed to +by @var{x} to a signaling NaN with payload @var{payload} and a zero +sign bit and return zero. If @var{payload} is not a positive-signed +integer that is a valid payload for a signaling NaN of the given type, +the object pointed to by @var{x} is set to positive zero and a nonzero +value is returned. They raise no floating-point exceptions. +@end deftypefun + +@node FP Comparison Functions +@subsection Floating-Point Comparison Functions +@cindex unordered comparison + +The standard C comparison operators provoke exceptions when one or other +of the operands is NaN. For example, + +@smallexample +int v = a < 1.0; +@end smallexample + +@noindent +will raise an exception if @var{a} is NaN. (This does @emph{not} +happen with @code{==} and @code{!=}; those merely return false and true, +respectively, when NaN is examined.) Frequently this exception is +undesirable. @w{ISO C99} therefore defines comparison functions that +do not raise exceptions when NaN is examined. All of the functions are +implemented as macros which allow their arguments to be of any +floating-point type. The macros are guaranteed to evaluate their +arguments only once. TS 18661-1:2014 adds such a macro for an +equality comparison that @emph{does} raise an exception for a NaN +argument; it also adds functions that provide a total ordering on all +floating-point values, including NaNs, without raising any exceptions +even for signaling NaNs. + +@comment math.h +@comment ISO +@deftypefn Macro int isgreater (@emph{real-floating} @var{x}, @emph{real-floating} @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro determines whether the argument @var{x} is greater than +@var{y}. It is equivalent to @code{(@var{x}) > (@var{y})}, but no +exception is raised if @var{x} or @var{y} are NaN. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn Macro int isgreaterequal (@emph{real-floating} @var{x}, @emph{real-floating} @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro determines whether the argument @var{x} is greater than or +equal to @var{y}. It is equivalent to @code{(@var{x}) >= (@var{y})}, but no +exception is raised if @var{x} or @var{y} are NaN. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn Macro int isless (@emph{real-floating} @var{x}, @emph{real-floating} @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro determines whether the argument @var{x} is less than @var{y}. +It is equivalent to @code{(@var{x}) < (@var{y})}, but no exception is +raised if @var{x} or @var{y} are NaN. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn Macro int islessequal (@emph{real-floating} @var{x}, @emph{real-floating} @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro determines whether the argument @var{x} is less than or equal +to @var{y}. It is equivalent to @code{(@var{x}) <= (@var{y})}, but no +exception is raised if @var{x} or @var{y} are NaN. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn Macro int islessgreater (@emph{real-floating} @var{x}, @emph{real-floating} @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro determines whether the argument @var{x} is less or greater +than @var{y}. It is equivalent to @code{(@var{x}) < (@var{y}) || +(@var{x}) > (@var{y})} (although it only evaluates @var{x} and @var{y} +once), but no exception is raised if @var{x} or @var{y} are NaN. + +This macro is not equivalent to @code{@var{x} != @var{y}}, because that +expression is true if @var{x} or @var{y} are NaN. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn Macro int isunordered (@emph{real-floating} @var{x}, @emph{real-floating} @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro determines whether its arguments are unordered. In other +words, it is true if @var{x} or @var{y} are NaN, and false otherwise. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefn Macro int iseqsig (@emph{real-floating} @var{x}, @emph{real-floating} @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro determines whether its arguments are equal. It is +equivalent to @code{(@var{x}) == (@var{y})}, but it raises the invalid +exception and sets @code{errno} to @code{EDOM} if either argument is a +NaN. +@end deftypefn + +@comment math.h +@comment ISO +@deftypefun int totalorder (double @var{x}, double @var{y}) +@comment ISO +@deftypefunx int totalorderf (float @var{x}, float @var{y}) +@comment ISO +@deftypefunx int totalorderl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions determine whether the total order relationship, +defined in IEEE 754-2008, is true for @var{x} and @var{y}, returning +nonzero if it is true and zero if it is false. No exceptions are +raised even for signaling NaNs. The relationship is true if they are +the same floating-point value (including sign for zero and NaNs, and +payload for NaNs), or if @var{x} comes before @var{y} in the following +order: negative quiet NaNs, in order of decreasing payload; negative +signaling NaNs, in order of decreasing payload; negative infinity; +finite numbers, in ascending order, with negative zero before positive +zero; positive infinity; positive signaling NaNs, in order of +increasing payload; positive quiet NaNs, in order of increasing +payload. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun int totalordermag (double @var{x}, double @var{y}) +@comment ISO +@deftypefunx int totalordermagf (float @var{x}, float @var{y}) +@comment ISO +@deftypefunx int totalordermagl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions determine whether the total order relationship, +defined in IEEE 754-2008, is true for the absolute values of @var{x} +and @var{y}, returning nonzero if it is true and zero if it is false. +No exceptions are raised even for signaling NaNs. +@end deftypefun + +Not all machines provide hardware support for these operations. On +machines that don't, the macros can be very slow. Therefore, you should +not use these functions when NaN is not a concern. + +@strong{NB:} There are no macros @code{isequal} or @code{isunequal}. +They are unnecessary, because the @code{==} and @code{!=} operators do +@emph{not} throw an exception if one or both of the operands are NaN. + +@node Misc FP Arithmetic +@subsection Miscellaneous FP arithmetic functions +@cindex minimum +@cindex maximum +@cindex positive difference +@cindex multiply-add + +The functions in this section perform miscellaneous but common +operations that are awkward to express with C operators. On some +processors these functions can use special machine instructions to +perform these operations faster than the equivalent C code. + +@comment math.h +@comment ISO +@deftypefun double fmin (double @var{x}, double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float fminf (float @var{x}, float @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} fminl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fmin} function returns the lesser of the two values @var{x} +and @var{y}. It is similar to the expression +@smallexample +((x) < (y) ? (x) : (y)) +@end smallexample +except that @var{x} and @var{y} are only evaluated once. + +If an argument is NaN, the other argument is returned. If both arguments +are NaN, NaN is returned. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double fmax (double @var{x}, double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float fmaxf (float @var{x}, float @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} fmaxl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fmax} function returns the greater of the two values @var{x} +and @var{y}. + +If an argument is NaN, the other argument is returned. If both arguments +are NaN, NaN is returned. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double fminmag (double @var{x}, double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float fminmagf (float @var{x}, float @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} fminmagl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions, from TS 18661-1:2014, return whichever of the two +values @var{x} and @var{y} has the smaller absolute value. If both +have the same absolute value, or either is NaN, they behave the same +as the @code{fmin} functions. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double fmaxmag (double @var{x}, double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float fmaxmagf (float @var{x}, float @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} fmaxmagl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions, from TS 18661-1:2014, return whichever of the two +values @var{x} and @var{y} has the greater absolute value. If both +have the same absolute value, or either is NaN, they behave the same +as the @code{fmax} functions. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double fdim (double @var{x}, double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float fdimf (float @var{x}, float @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} fdiml (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fdim} function returns the positive difference between +@var{x} and @var{y}. The positive difference is @math{@var{x} - +@var{y}} if @var{x} is greater than @var{y}, and @math{0} otherwise. + +If @var{x}, @var{y}, or both are NaN, NaN is returned. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double fma (double @var{x}, double @var{y}, double @var{z}) +@comment math.h +@comment ISO +@deftypefunx float fmaf (float @var{x}, float @var{y}, float @var{z}) +@comment math.h +@comment ISO +@deftypefunx {long double} fmal (long double @var{x}, long double @var{y}, long double @var{z}) +@cindex butterfly +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fma} function performs floating-point multiply-add. This is +the operation @math{(@var{x} @mul{} @var{y}) + @var{z}}, but the +intermediate result is not rounded to the destination type. This can +sometimes improve the precision of a calculation. + +This function was introduced because some processors have a special +instruction to perform multiply-add. The C compiler cannot use it +directly, because the expression @samp{x*y + z} is defined to round the +intermediate result. @code{fma} lets you choose when you want to round +only once. + +@vindex FP_FAST_FMA +On processors which do not implement multiply-add in hardware, +@code{fma} can be very slow since it must avoid intermediate rounding. +@file{math.h} defines the symbols @code{FP_FAST_FMA}, +@code{FP_FAST_FMAF}, and @code{FP_FAST_FMAL} when the corresponding +version of @code{fma} is no slower than the expression @samp{x*y + z}. +In @theglibc{}, this always means the operation is implemented in +hardware. +@end deftypefun + +@node Complex Numbers +@section Complex Numbers +@pindex complex.h +@cindex complex numbers + +@w{ISO C99} introduces support for complex numbers in C. This is done +with a new type qualifier, @code{complex}. It is a keyword if and only +if @file{complex.h} has been included. There are three complex types, +corresponding to the three real types: @code{float complex}, +@code{double complex}, and @code{long double complex}. + +To construct complex numbers you need a way to indicate the imaginary +part of a number. There is no standard notation for an imaginary +floating point constant. Instead, @file{complex.h} defines two macros +that can be used to create complex numbers. + +@deftypevr Macro {const float complex} _Complex_I +This macro is a representation of the complex number ``@math{0+1i}''. +Multiplying a real floating-point value by @code{_Complex_I} gives a +complex number whose value is purely imaginary. You can use this to +construct complex constants: + +@smallexample +@math{3.0 + 4.0i} = @code{3.0 + 4.0 * _Complex_I} +@end smallexample + +Note that @code{_Complex_I * _Complex_I} has the value @code{-1}, but +the type of that value is @code{complex}. +@end deftypevr + +@c Put this back in when gcc supports _Imaginary_I. It's too confusing. +@ignore +@noindent +Without an optimizing compiler this is more expensive than the use of +@code{_Imaginary_I} but with is better than nothing. You can avoid all +the hassles if you use the @code{I} macro below if the name is not +problem. + +@deftypevr Macro {const float imaginary} _Imaginary_I +This macro is a representation of the value ``@math{1i}''. I.e., it is +the value for which + +@smallexample +_Imaginary_I * _Imaginary_I = -1 +@end smallexample + +@noindent +The result is not of type @code{float imaginary} but instead @code{float}. +One can use it to easily construct complex number like in + +@smallexample +3.0 - _Imaginary_I * 4.0 +@end smallexample + +@noindent +which results in the complex number with a real part of 3.0 and a +imaginary part -4.0. +@end deftypevr +@end ignore + +@noindent +@code{_Complex_I} is a bit of a mouthful. @file{complex.h} also defines +a shorter name for the same constant. + +@deftypevr Macro {const float complex} I +This macro has exactly the same value as @code{_Complex_I}. Most of the +time it is preferable. However, it causes problems if you want to use +the identifier @code{I} for something else. You can safely write + +@smallexample +#include <complex.h> +#undef I +@end smallexample + +@noindent +if you need @code{I} for your own purposes. (In that case we recommend +you also define some other short name for @code{_Complex_I}, such as +@code{J}.) + +@ignore +If the implementation does not support the @code{imaginary} types +@code{I} is defined as @code{_Complex_I} which is the second best +solution. It still can be used in the same way but requires a most +clever compiler to get the same results. +@end ignore +@end deftypevr + +@node Operations on Complex +@section Projections, Conjugates, and Decomposing of Complex Numbers +@cindex project complex numbers +@cindex conjugate complex numbers +@cindex decompose complex numbers +@pindex complex.h + +@w{ISO C99} also defines functions that perform basic operations on +complex numbers, such as decomposition and conjugation. The prototypes +for all these functions are in @file{complex.h}. All functions are +available in three variants, one for each of the three complex types. + +@comment complex.h +@comment ISO +@deftypefun double creal (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx float crealf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {long double} creall (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the real part of the complex number @var{z}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun double cimag (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx float cimagf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {long double} cimagl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the imaginary part of the complex number @var{z}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} conj (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} conjf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} conjl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the conjugate value of the complex number +@var{z}. The conjugate of a complex number has the same real part and a +negated imaginary part. In other words, @samp{conj(a + bi) = a + -bi}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun double carg (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx float cargf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {long double} cargl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the argument of the complex number @var{z}. +The argument of a complex number is the angle in the complex plane +between the positive real axis and a line passing through zero and the +number. This angle is measured in the usual fashion and ranges from +@math{-@pi{}} to @math{@pi{}}. + +@code{carg} has a branch cut along the negative real axis. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} cproj (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} cprojf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} cprojl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the projection of the complex value @var{z} onto +the Riemann sphere. Values with an infinite imaginary part are projected +to positive infinity on the real axis, even if the real part is NaN. If +the real part is infinite, the result is equivalent to + +@smallexample +INFINITY + I * copysign (0.0, cimag (z)) +@end smallexample +@end deftypefun + +@node Parsing of Numbers +@section Parsing of Numbers +@cindex parsing numbers (in formatted input) +@cindex converting strings to numbers +@cindex number syntax, parsing +@cindex syntax, for reading numbers + +This section describes functions for ``reading'' integer and +floating-point numbers from a string. It may be more convenient in some +cases to use @code{sscanf} or one of the related functions; see +@ref{Formatted Input}. But often you can make a program more robust by +finding the tokens in the string by hand, then converting the numbers +one by one. + +@menu +* Parsing of Integers:: Functions for conversion of integer values. +* Parsing of Floats:: Functions for conversion of floating-point + values. +@end menu + +@node Parsing of Integers +@subsection Parsing of Integers + +@pindex stdlib.h +@pindex wchar.h +The @samp{str} functions are declared in @file{stdlib.h} and those +beginning with @samp{wcs} are declared in @file{wchar.h}. One might +wonder about the use of @code{restrict} in the prototypes of the +functions in this section. It is seemingly useless but the @w{ISO C} +standard uses it (for the functions defined there) so we have to do it +as well. + +@comment stdlib.h +@comment ISO +@deftypefun {long int} strtol (const char *restrict @var{string}, char **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c strtol uses the thread-local pointer to the locale in effect, and +@c strtol_l loads the LC_NUMERIC locale data from it early on and once, +@c but if the locale is the global locale, and another thread calls +@c setlocale in a way that modifies the pointer to the LC_CTYPE locale +@c category, the behavior of e.g. IS*, TOUPPER will vary throughout the +@c execution of the function, because they re-read the locale data from +@c the given locale pointer. We solved this by documenting setlocale as +@c MT-Unsafe. +The @code{strtol} (``string-to-long'') function converts the initial +part of @var{string} to a signed integer, which is returned as a value +of type @code{long int}. + +This function attempts to decompose @var{string} as follows: + +@itemize @bullet +@item +A (possibly empty) sequence of whitespace characters. Which characters +are whitespace is determined by the @code{isspace} function +(@pxref{Classification of Characters}). These are discarded. + +@item +An optional plus or minus sign (@samp{+} or @samp{-}). + +@item +A nonempty sequence of digits in the radix specified by @var{base}. + +If @var{base} is zero, decimal radix is assumed unless the series of +digits begins with @samp{0} (specifying octal radix), or @samp{0x} or +@samp{0X} (specifying hexadecimal radix); in other words, the same +syntax used for integer constants in C. + +Otherwise @var{base} must have a value between @code{2} and @code{36}. +If @var{base} is @code{16}, the digits may optionally be preceded by +@samp{0x} or @samp{0X}. If base has no legal value the value returned +is @code{0l} and the global variable @code{errno} is set to @code{EINVAL}. + +@item +Any remaining characters in the string. If @var{tailptr} is not a null +pointer, @code{strtol} stores a pointer to this tail in +@code{*@var{tailptr}}. +@end itemize + +If the string is empty, contains only whitespace, or does not contain an +initial substring that has the expected syntax for an integer in the +specified @var{base}, no conversion is performed. In this case, +@code{strtol} returns a value of zero and the value stored in +@code{*@var{tailptr}} is the value of @var{string}. + +In a locale other than the standard @code{"C"} locale, this function +may recognize additional implementation-dependent syntax. + +If the string has valid syntax for an integer but the value is not +representable because of overflow, @code{strtol} returns either +@code{LONG_MAX} or @code{LONG_MIN} (@pxref{Range of Type}), as +appropriate for the sign of the value. It also sets @code{errno} +to @code{ERANGE} to indicate there was overflow. + +You should not check for errors by examining the return value of +@code{strtol}, because the string might be a valid representation of +@code{0l}, @code{LONG_MAX}, or @code{LONG_MIN}. Instead, check whether +@var{tailptr} points to what you expect after the number +(e.g. @code{'\0'} if the string should end after the number). You also +need to clear @var{errno} before the call and check it afterward, in +case there was overflow. + +There is an example at the end of this section. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {long int} wcstol (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstol} function is equivalent to the @code{strtol} function +in nearly all aspects but handles wide character strings. + +The @code{wcstol} function was introduced in @w{Amendment 1} of @w{ISO C90}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun {unsigned long int} strtoul (const char *retrict @var{string}, char **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{strtoul} (``string-to-unsigned-long'') function is like +@code{strtol} except it converts to an @code{unsigned long int} value. +The syntax is the same as described above for @code{strtol}. The value +returned on overflow is @code{ULONG_MAX} (@pxref{Range of Type}). + +If @var{string} depicts a negative number, @code{strtoul} acts the same +as @var{strtol} but casts the result to an unsigned integer. That means +for example that @code{strtoul} on @code{"-1"} returns @code{ULONG_MAX} +and an input more negative than @code{LONG_MIN} returns +(@code{ULONG_MAX} + 1) / 2. + +@code{strtoul} sets @var{errno} to @code{EINVAL} if @var{base} is out of +range, or @code{ERANGE} on overflow. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {unsigned long int} wcstoul (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstoul} function is equivalent to the @code{strtoul} function +in nearly all aspects but handles wide character strings. + +The @code{wcstoul} function was introduced in @w{Amendment 1} of @w{ISO C90}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun {long long int} strtoll (const char *restrict @var{string}, char **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{strtoll} function is like @code{strtol} except that it returns +a @code{long long int} value, and accepts numbers with a correspondingly +larger range. + +If the string has valid syntax for an integer but the value is not +representable because of overflow, @code{strtoll} returns either +@code{LLONG_MAX} or @code{LLONG_MIN} (@pxref{Range of Type}), as +appropriate for the sign of the value. It also sets @code{errno} to +@code{ERANGE} to indicate there was overflow. + +The @code{strtoll} function was introduced in @w{ISO C99}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {long long int} wcstoll (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstoll} function is equivalent to the @code{strtoll} function +in nearly all aspects but handles wide character strings. + +The @code{wcstoll} function was introduced in @w{Amendment 1} of @w{ISO C90}. +@end deftypefun + +@comment stdlib.h +@comment BSD +@deftypefun {long long int} strtoq (const char *restrict @var{string}, char **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@code{strtoq} (``string-to-quad-word'') is the BSD name for @code{strtoll}. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun {long long int} wcstoq (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstoq} function is equivalent to the @code{strtoq} function +in nearly all aspects but handles wide character strings. + +The @code{wcstoq} function is a GNU extension. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun {unsigned long long int} strtoull (const char *restrict @var{string}, char **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{strtoull} function is related to @code{strtoll} the same way +@code{strtoul} is related to @code{strtol}. + +The @code{strtoull} function was introduced in @w{ISO C99}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {unsigned long long int} wcstoull (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstoull} function is equivalent to the @code{strtoull} function +in nearly all aspects but handles wide character strings. + +The @code{wcstoull} function was introduced in @w{Amendment 1} of @w{ISO C90}. +@end deftypefun + +@comment stdlib.h +@comment BSD +@deftypefun {unsigned long long int} strtouq (const char *restrict @var{string}, char **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@code{strtouq} is the BSD name for @code{strtoull}. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun {unsigned long long int} wcstouq (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstouq} function is equivalent to the @code{strtouq} function +in nearly all aspects but handles wide character strings. + +The @code{wcstouq} function is a GNU extension. +@end deftypefun + +@comment inttypes.h +@comment ISO +@deftypefun intmax_t strtoimax (const char *restrict @var{string}, char **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{strtoimax} function is like @code{strtol} except that it returns +a @code{intmax_t} value, and accepts numbers of a corresponding range. + +If the string has valid syntax for an integer but the value is not +representable because of overflow, @code{strtoimax} returns either +@code{INTMAX_MAX} or @code{INTMAX_MIN} (@pxref{Integers}), as +appropriate for the sign of the value. It also sets @code{errno} to +@code{ERANGE} to indicate there was overflow. + +See @ref{Integers} for a description of the @code{intmax_t} type. The +@code{strtoimax} function was introduced in @w{ISO C99}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun intmax_t wcstoimax (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstoimax} function is equivalent to the @code{strtoimax} function +in nearly all aspects but handles wide character strings. + +The @code{wcstoimax} function was introduced in @w{ISO C99}. +@end deftypefun + +@comment inttypes.h +@comment ISO +@deftypefun uintmax_t strtoumax (const char *restrict @var{string}, char **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{strtoumax} function is related to @code{strtoimax} +the same way that @code{strtoul} is related to @code{strtol}. + +See @ref{Integers} for a description of the @code{intmax_t} type. The +@code{strtoumax} function was introduced in @w{ISO C99}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun uintmax_t wcstoumax (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}, int @var{base}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstoumax} function is equivalent to the @code{strtoumax} function +in nearly all aspects but handles wide character strings. + +The @code{wcstoumax} function was introduced in @w{ISO C99}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun {long int} atol (const char *@var{string}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +This function is similar to the @code{strtol} function with a @var{base} +argument of @code{10}, except that it need not detect overflow errors. +The @code{atol} function is provided mostly for compatibility with +existing code; using @code{strtol} is more robust. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun int atoi (const char *@var{string}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +This function is like @code{atol}, except that it returns an @code{int}. +The @code{atoi} function is also considered obsolete; use @code{strtol} +instead. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun {long long int} atoll (const char *@var{string}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +This function is similar to @code{atol}, except it returns a @code{long +long int}. + +The @code{atoll} function was introduced in @w{ISO C99}. It too is +obsolete (despite having just been added); use @code{strtoll} instead. +@end deftypefun + +All the functions mentioned in this section so far do not handle +alternative representations of characters as described in the locale +data. Some locales specify thousands separator and the way they have to +be used which can help to make large numbers more readable. To read +such numbers one has to use the @code{scanf} functions with the @samp{'} +flag. + +Here is a function which parses a string as a sequence of integers and +returns the sum of them: + +@smallexample +int +sum_ints_from_string (char *string) +@{ + int sum = 0; + + while (1) @{ + char *tail; + int next; + + /* @r{Skip whitespace by hand, to detect the end.} */ + while (isspace (*string)) string++; + if (*string == 0) + break; + + /* @r{There is more nonwhitespace,} */ + /* @r{so it ought to be another number.} */ + errno = 0; + /* @r{Parse it.} */ + next = strtol (string, &tail, 0); + /* @r{Add it in, if not overflow.} */ + if (errno) + printf ("Overflow\n"); + else + sum += next; + /* @r{Advance past it.} */ + string = tail; + @} + + return sum; +@} +@end smallexample + +@node Parsing of Floats +@subsection Parsing of Floats + +@pindex stdlib.h +The @samp{str} functions are declared in @file{stdlib.h} and those +beginning with @samp{wcs} are declared in @file{wchar.h}. One might +wonder about the use of @code{restrict} in the prototypes of the +functions in this section. It is seemingly useless but the @w{ISO C} +standard uses it (for the functions defined there) so we have to do it +as well. + +@comment stdlib.h +@comment ISO +@deftypefun double strtod (const char *restrict @var{string}, char **restrict @var{tailptr}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c Besides the unsafe-but-ruled-safe locale uses, this uses a lot of +@c mpn, but it's all safe. +@c +@c round_and_return +@c get_rounding_mode ok +@c mpn_add_1 ok +@c mpn_rshift ok +@c MPN_ZERO ok +@c MPN2FLOAT -> mpn_construct_(float|double|long_double) ok +@c str_to_mpn +@c mpn_mul_1 -> umul_ppmm ok +@c mpn_add_1 ok +@c mpn_lshift_1 -> mpn_lshift ok +@c STRTOF_INTERNAL +@c MPN_VAR ok +@c SET_MANTISSA ok +@c STRNCASECMP ok, wide and narrow +@c round_and_return ok +@c mpn_mul ok +@c mpn_addmul_1 ok +@c ... mpn_sub +@c mpn_lshift ok +@c udiv_qrnnd ok +@c count_leading_zeros ok +@c add_ssaaaa ok +@c sub_ddmmss ok +@c umul_ppmm ok +@c mpn_submul_1 ok +The @code{strtod} (``string-to-double'') function converts the initial +part of @var{string} to a floating-point number, which is returned as a +value of type @code{double}. + +This function attempts to decompose @var{string} as follows: + +@itemize @bullet +@item +A (possibly empty) sequence of whitespace characters. Which characters +are whitespace is determined by the @code{isspace} function +(@pxref{Classification of Characters}). These are discarded. + +@item +An optional plus or minus sign (@samp{+} or @samp{-}). + +@item A floating point number in decimal or hexadecimal format. The +decimal format is: +@itemize @minus + +@item +A nonempty sequence of digits optionally containing a decimal-point +character---normally @samp{.}, but it depends on the locale +(@pxref{General Numeric}). + +@item +An optional exponent part, consisting of a character @samp{e} or +@samp{E}, an optional sign, and a sequence of digits. + +@end itemize + +The hexadecimal format is as follows: +@itemize @minus + +@item +A 0x or 0X followed by a nonempty sequence of hexadecimal digits +optionally containing a decimal-point character---normally @samp{.}, but +it depends on the locale (@pxref{General Numeric}). + +@item +An optional binary-exponent part, consisting of a character @samp{p} or +@samp{P}, an optional sign, and a sequence of digits. + +@end itemize + +@item +Any remaining characters in the string. If @var{tailptr} is not a null +pointer, a pointer to this tail of the string is stored in +@code{*@var{tailptr}}. +@end itemize + +If the string is empty, contains only whitespace, or does not contain an +initial substring that has the expected syntax for a floating-point +number, no conversion is performed. In this case, @code{strtod} returns +a value of zero and the value returned in @code{*@var{tailptr}} is the +value of @var{string}. + +In a locale other than the standard @code{"C"} or @code{"POSIX"} locales, +this function may recognize additional locale-dependent syntax. + +If the string has valid syntax for a floating-point number but the value +is outside the range of a @code{double}, @code{strtod} will signal +overflow or underflow as described in @ref{Math Error Reporting}. + +@code{strtod} recognizes four special input strings. The strings +@code{"inf"} and @code{"infinity"} are converted to @math{@infinity{}}, +or to the largest representable value if the floating-point format +doesn't support infinities. You can prepend a @code{"+"} or @code{"-"} +to specify the sign. Case is ignored when scanning these strings. + +The strings @code{"nan"} and @code{"nan(@var{chars@dots{}})"} are converted +to NaN. Again, case is ignored. If @var{chars@dots{}} are provided, they +are used in some unspecified fashion to select a particular +representation of NaN (there can be several). + +Since zero is a valid result as well as the value returned on error, you +should check for errors in the same way as for @code{strtol}, by +examining @var{errno} and @var{tailptr}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun float strtof (const char *@var{string}, char **@var{tailptr}) +@comment stdlib.h +@comment ISO +@deftypefunx {long double} strtold (const char *@var{string}, char **@var{tailptr}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +These functions are analogous to @code{strtod}, but return @code{float} +and @code{long double} values respectively. They report errors in the +same way as @code{strtod}. @code{strtof} can be substantially faster +than @code{strtod}, but has less precision; conversely, @code{strtold} +can be much slower but has more precision (on systems where @code{long +double} is a separate type). + +These functions have been GNU extensions and are new to @w{ISO C99}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun double wcstod (const wchar_t *restrict @var{string}, wchar_t **restrict @var{tailptr}) +@comment stdlib.h +@comment ISO +@deftypefunx float wcstof (const wchar_t *@var{string}, wchar_t **@var{tailptr}) +@comment stdlib.h +@comment ISO +@deftypefunx {long double} wcstold (const wchar_t *@var{string}, wchar_t **@var{tailptr}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +The @code{wcstod}, @code{wcstof}, and @code{wcstol} functions are +equivalent in nearly all aspect to the @code{strtod}, @code{strtof}, and +@code{strtold} functions but it handles wide character string. + +The @code{wcstod} function was introduced in @w{Amendment 1} of @w{ISO +C90}. The @code{wcstof} and @code{wcstold} functions were introduced in +@w{ISO C99}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun double atof (const char *@var{string}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +This function is similar to the @code{strtod} function, except that it +need not detect overflow and underflow errors. The @code{atof} function +is provided mostly for compatibility with existing code; using +@code{strtod} is more robust. +@end deftypefun + +@Theglibc{} also provides @samp{_l} versions of these functions, +which take an additional argument, the locale to use in conversion. + +See also @ref{Parsing of Integers}. + +@node Printing of Floats +@section Printing of Floats + +@pindex stdlib.h +The @samp{strfrom} functions are declared in @file{stdlib.h}. + +@comment stdlib.h +@comment ISO/IEC TS 18661-1 +@deftypefun int strfromd (char *restrict @var{string}, size_t @var{size}, const char *restrict @var{format}, double @var{value}) +@deftypefunx int strfromf (char *restrict @var{string}, size_t @var{size}, const char *restrict @var{format}, float @var{value}) +@deftypefunx int strfroml (char *restrict @var{string}, size_t @var{size}, const char *restrict @var{format}, long double @var{value}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@comment these functions depend on __printf_fp and __printf_fphex, which are +@comment AS-unsafe (ascuheap) and AC-unsafe (acsmem). +The functions @code{strfromd} (``string-from-double''), @code{strfromf} +(``string-from-float''), and @code{strfroml} (``string-from-long-double'') +convert the floating-point number @var{value} to a string of characters and +stores them into the area pointed to by @var{string}. The conversion +writes at most @var{size} characters and respects the format specified by +@var{format}. + +The format string must start with the character @samp{%}. An optional +precision follows, which starts with a period, @samp{.}, and may be +followed by a decimal integer, representing the precision. If a decimal +integer is not specified after the period, the precision is taken to be +zero. The character @samp{*} is not allowed. Finally, the format string +ends with one of the following conversion specifiers: @samp{a}, @samp{A}, +@samp{e}, @samp{E}, @samp{f}, @samp{F}, @samp{g} or @samp{G} (@pxref{Table +of Output Conversions}). Invalid format strings result in undefined +behavior. + +These functions return the number of characters that would have been +written to @var{string} had @var{size} been sufficiently large, not +counting the terminating null character. Thus, the null-terminated output +has been completely written if and only if the returned value is less than +@var{size}. + +These functions were introduced by ISO/IEC TS 18661-1. +@end deftypefun + +@node System V Number Conversion +@section Old-fashioned System V number-to-string functions + +The old @w{System V} C library provided three functions to convert +numbers to strings, with unusual and hard-to-use semantics. @Theglibc{} +also provides these functions and some natural extensions. + +These functions are only available in @theglibc{} and on systems descended +from AT&T Unix. Therefore, unless these functions do precisely what you +need, it is better to use @code{sprintf}, which is standard. + +All these functions are defined in @file{stdlib.h}. + +@comment stdlib.h +@comment SVID, Unix98 +@deftypefun {char *} ecvt (double @var{value}, int @var{ndigit}, int *@var{decpt}, int *@var{neg}) +@safety{@prelim{}@mtunsafe{@mtasurace{:ecvt}}@asunsafe{}@acsafe{}} +The function @code{ecvt} converts the floating-point number @var{value} +to a string with at most @var{ndigit} decimal digits. The +returned string contains no decimal point or sign. The first digit of +the string is non-zero (unless @var{value} is actually zero) and the +last digit is rounded to nearest. @code{*@var{decpt}} is set to the +index in the string of the first digit after the decimal point. +@code{*@var{neg}} is set to a nonzero value if @var{value} is negative, +zero otherwise. + +If @var{ndigit} decimal digits would exceed the precision of a +@code{double} it is reduced to a system-specific value. + +The returned string is statically allocated and overwritten by each call +to @code{ecvt}. + +If @var{value} is zero, it is implementation defined whether +@code{*@var{decpt}} is @code{0} or @code{1}. + +For example: @code{ecvt (12.3, 5, &d, &n)} returns @code{"12300"} +and sets @var{d} to @code{2} and @var{n} to @code{0}. +@end deftypefun + +@comment stdlib.h +@comment SVID, Unix98 +@deftypefun {char *} fcvt (double @var{value}, int @var{ndigit}, int *@var{decpt}, int *@var{neg}) +@safety{@prelim{}@mtunsafe{@mtasurace{:fcvt}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The function @code{fcvt} is like @code{ecvt}, but @var{ndigit} specifies +the number of digits after the decimal point. If @var{ndigit} is less +than zero, @var{value} is rounded to the @math{@var{ndigit}+1}'th place to the +left of the decimal point. For example, if @var{ndigit} is @code{-1}, +@var{value} will be rounded to the nearest 10. If @var{ndigit} is +negative and larger than the number of digits to the left of the decimal +point in @var{value}, @var{value} will be rounded to one significant digit. + +If @var{ndigit} decimal digits would exceed the precision of a +@code{double} it is reduced to a system-specific value. + +The returned string is statically allocated and overwritten by each call +to @code{fcvt}. +@end deftypefun + +@comment stdlib.h +@comment SVID, Unix98 +@deftypefun {char *} gcvt (double @var{value}, int @var{ndigit}, char *@var{buf}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c gcvt calls sprintf, that ultimately calls vfprintf, which malloc()s +@c args_value if it's too large, but gcvt never exercises this path. +@code{gcvt} is functionally equivalent to @samp{sprintf(buf, "%*g", +ndigit, value}. It is provided only for compatibility's sake. It +returns @var{buf}. + +If @var{ndigit} decimal digits would exceed the precision of a +@code{double} it is reduced to a system-specific value. +@end deftypefun + +As extensions, @theglibc{} provides versions of these three +functions that take @code{long double} arguments. + +@comment stdlib.h +@comment GNU +@deftypefun {char *} qecvt (long double @var{value}, int @var{ndigit}, int *@var{decpt}, int *@var{neg}) +@safety{@prelim{}@mtunsafe{@mtasurace{:qecvt}}@asunsafe{}@acsafe{}} +This function is equivalent to @code{ecvt} except that it takes a +@code{long double} for the first parameter and that @var{ndigit} is +restricted by the precision of a @code{long double}. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun {char *} qfcvt (long double @var{value}, int @var{ndigit}, int *@var{decpt}, int *@var{neg}) +@safety{@prelim{}@mtunsafe{@mtasurace{:qfcvt}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This function is equivalent to @code{fcvt} except that it +takes a @code{long double} for the first parameter and that @var{ndigit} is +restricted by the precision of a @code{long double}. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun {char *} qgcvt (long double @var{value}, int @var{ndigit}, char *@var{buf}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is equivalent to @code{gcvt} except that it takes a +@code{long double} for the first parameter and that @var{ndigit} is +restricted by the precision of a @code{long double}. +@end deftypefun + + +@cindex gcvt_r +The @code{ecvt} and @code{fcvt} functions, and their @code{long double} +equivalents, all return a string located in a static buffer which is +overwritten by the next call to the function. @Theglibc{} +provides another set of extended functions which write the converted +string into a user-supplied buffer. These have the conventional +@code{_r} suffix. + +@code{gcvt_r} is not necessary, because @code{gcvt} already uses a +user-supplied buffer. + +@comment stdlib.h +@comment GNU +@deftypefun int ecvt_r (double @var{value}, int @var{ndigit}, int *@var{decpt}, int *@var{neg}, char *@var{buf}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{ecvt_r} function is the same as @code{ecvt}, except +that it places its result into the user-specified buffer pointed to by +@var{buf}, with length @var{len}. The return value is @code{-1} in +case of an error and zero otherwise. + +This function is a GNU extension. +@end deftypefun + +@comment stdlib.h +@comment SVID, Unix98 +@deftypefun int fcvt_r (double @var{value}, int @var{ndigit}, int *@var{decpt}, int *@var{neg}, char *@var{buf}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fcvt_r} function is the same as @code{fcvt}, except that it +places its result into the user-specified buffer pointed to by +@var{buf}, with length @var{len}. The return value is @code{-1} in +case of an error and zero otherwise. + +This function is a GNU extension. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int qecvt_r (long double @var{value}, int @var{ndigit}, int *@var{decpt}, int *@var{neg}, char *@var{buf}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{qecvt_r} function is the same as @code{qecvt}, except +that it places its result into the user-specified buffer pointed to by +@var{buf}, with length @var{len}. The return value is @code{-1} in +case of an error and zero otherwise. + +This function is a GNU extension. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int qfcvt_r (long double @var{value}, int @var{ndigit}, int *@var{decpt}, int *@var{neg}, char *@var{buf}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{qfcvt_r} function is the same as @code{qfcvt}, except +that it places its result into the user-specified buffer pointed to by +@var{buf}, with length @var{len}. The return value is @code{-1} in +case of an error and zero otherwise. + +This function is a GNU extension. +@end deftypefun diff --git a/REORG.TODO/manual/charset.texi b/REORG.TODO/manual/charset.texi new file mode 100644 index 0000000000..147d9c579a --- /dev/null +++ b/REORG.TODO/manual/charset.texi @@ -0,0 +1,2989 @@ +@node Character Set Handling, Locales, String and Array Utilities, Top +@c %MENU% Support for extended character sets +@chapter Character Set Handling + +@ifnottex +@macro cal{text} +\text\ +@end macro +@end ifnottex + +Character sets used in the early days of computing had only six, seven, +or eight bits for each character: there was never a case where more than +eight bits (one byte) were used to represent a single character. The +limitations of this approach became more apparent as more people +grappled with non-Roman character sets, where not all the characters +that make up a language's character set can be represented by @math{2^8} +choices. This chapter shows the functionality that was added to the C +library to support multiple character sets. + +@menu +* Extended Char Intro:: Introduction to Extended Characters. +* Charset Function Overview:: Overview about Character Handling + Functions. +* Restartable multibyte conversion:: Restartable multibyte conversion + Functions. +* Non-reentrant Conversion:: Non-reentrant Conversion Function. +* Generic Charset Conversion:: Generic Charset Conversion. +@end menu + + +@node Extended Char Intro +@section Introduction to Extended Characters + +A variety of solutions are available to overcome the differences between +character sets with a 1:1 relation between bytes and characters and +character sets with ratios of 2:1 or 4:1. The remainder of this +section gives a few examples to help understand the design decisions +made while developing the functionality of the @w{C library}. + +@cindex internal representation +A distinction we have to make right away is between internal and +external representation. @dfn{Internal representation} means the +representation used by a program while keeping the text in memory. +External representations are used when text is stored or transmitted +through some communication channel. Examples of external +representations include files waiting in a directory to be +read and parsed. + +Traditionally there has been no difference between the two representations. +It was equally comfortable and useful to use the same single-byte +representation internally and externally. This comfort level decreases +with more and larger character sets. + +One of the problems to overcome with the internal representation is +handling text that is externally encoded using different character +sets. Assume a program that reads two texts and compares them using +some metric. The comparison can be usefully done only if the texts are +internally kept in a common format. + +@cindex wide character +For such a common format (@math{=} character set) eight bits are certainly +no longer enough. So the smallest entity will have to grow: @dfn{wide +characters} will now be used. Instead of one byte per character, two or +four will be used instead. (Three are not good to address in memory and +more than four bytes seem not to be necessary). + +@cindex Unicode +@cindex ISO 10646 +As shown in some other part of this manual, +@c !!! Ahem, wide char string functions are not yet covered -- drepper +a completely new family has been created of functions that can handle wide +character texts in memory. The most commonly used character sets for such +internal wide character representations are Unicode and @w{ISO 10646} +(also known as UCS for Universal Character Set). Unicode was originally +planned as a 16-bit character set; whereas, @w{ISO 10646} was designed to +be a 31-bit large code space. The two standards are practically identical. +They have the same character repertoire and code table, but Unicode specifies +added semantics. At the moment, only characters in the first @code{0x10000} +code positions (the so-called Basic Multilingual Plane, BMP) have been +assigned, but the assignment of more specialized characters outside this +16-bit space is already in progress. A number of encodings have been +defined for Unicode and @w{ISO 10646} characters: +@cindex UCS-2 +@cindex UCS-4 +@cindex UTF-8 +@cindex UTF-16 +UCS-2 is a 16-bit word that can only represent characters +from the BMP, UCS-4 is a 32-bit word than can represent any Unicode +and @w{ISO 10646} character, UTF-8 is an ASCII compatible encoding where +ASCII characters are represented by ASCII bytes and non-ASCII characters +by sequences of 2-6 non-ASCII bytes, and finally UTF-16 is an extension +of UCS-2 in which pairs of certain UCS-2 words can be used to encode +non-BMP characters up to @code{0x10ffff}. + +To represent wide characters the @code{char} type is not suitable. For +this reason the @w{ISO C} standard introduces a new type that is +designed to keep one character of a wide character string. To maintain +the similarity there is also a type corresponding to @code{int} for +those functions that take a single wide character. + +@comment stddef.h +@comment ISO +@deftp {Data type} wchar_t +This data type is used as the base type for wide character strings. +In other words, arrays of objects of this type are the equivalent of +@code{char[]} for multibyte character strings. The type is defined in +@file{stddef.h}. + +The @w{ISO C90} standard, where @code{wchar_t} was introduced, does not +say anything specific about the representation. It only requires that +this type is capable of storing all elements of the basic character set. +Therefore it would be legitimate to define @code{wchar_t} as @code{char}, +which might make sense for embedded systems. + +But in @theglibc{} @code{wchar_t} is always 32 bits wide and, therefore, +capable of representing all UCS-4 values and, therefore, covering all of +@w{ISO 10646}. Some Unix systems define @code{wchar_t} as a 16-bit type +and thereby follow Unicode very strictly. This definition is perfectly +fine with the standard, but it also means that to represent all +characters from Unicode and @w{ISO 10646} one has to use UTF-16 surrogate +characters, which is in fact a multi-wide-character encoding. But +resorting to multi-wide-character encoding contradicts the purpose of the +@code{wchar_t} type. +@end deftp + +@comment wchar.h +@comment ISO +@deftp {Data type} wint_t +@code{wint_t} is a data type used for parameters and variables that +contain a single wide character. As the name suggests this type is the +equivalent of @code{int} when using the normal @code{char} strings. The +types @code{wchar_t} and @code{wint_t} often have the same +representation if their size is 32 bits wide but if @code{wchar_t} is +defined as @code{char} the type @code{wint_t} must be defined as +@code{int} due to the parameter promotion. + +@pindex wchar.h +This type is defined in @file{wchar.h} and was introduced in +@w{Amendment 1} to @w{ISO C90}. +@end deftp + +As there are for the @code{char} data type macros are available for +specifying the minimum and maximum value representable in an object of +type @code{wchar_t}. + +@comment wchar.h +@comment ISO +@deftypevr Macro wint_t WCHAR_MIN +The macro @code{WCHAR_MIN} evaluates to the minimum value representable +by an object of type @code{wint_t}. + +This macro was introduced in @w{Amendment 1} to @w{ISO C90}. +@end deftypevr + +@comment wchar.h +@comment ISO +@deftypevr Macro wint_t WCHAR_MAX +The macro @code{WCHAR_MAX} evaluates to the maximum value representable +by an object of type @code{wint_t}. + +This macro was introduced in @w{Amendment 1} to @w{ISO C90}. +@end deftypevr + +Another special wide character value is the equivalent to @code{EOF}. + +@comment wchar.h +@comment ISO +@deftypevr Macro wint_t WEOF +The macro @code{WEOF} evaluates to a constant expression of type +@code{wint_t} whose value is different from any member of the extended +character set. + +@code{WEOF} need not be the same value as @code{EOF} and unlike +@code{EOF} it also need @emph{not} be negative. In other words, sloppy +code like + +@smallexample +@{ + int c; + @dots{} + while ((c = getc (fp)) < 0) + @dots{} +@} +@end smallexample + +@noindent +has to be rewritten to use @code{WEOF} explicitly when wide characters +are used: + +@smallexample +@{ + wint_t c; + @dots{} + while ((c = wgetc (fp)) != WEOF) + @dots{} +@} +@end smallexample + +@pindex wchar.h +This macro was introduced in @w{Amendment 1} to @w{ISO C90} and is +defined in @file{wchar.h}. +@end deftypevr + + +These internal representations present problems when it comes to storage +and transmittal. Because each single wide character consists of more +than one byte, they are affected by byte-ordering. Thus, machines with +different endianesses would see different values when accessing the same +data. This byte ordering concern also applies for communication protocols +that are all byte-based and therefore require that the sender has to +decide about splitting the wide character in bytes. A last (but not least +important) point is that wide characters often require more storage space +than a customized byte-oriented character set. + +@cindex multibyte character +@cindex EBCDIC +For all the above reasons, an external encoding that is different from +the internal encoding is often used if the latter is UCS-2 or UCS-4. +The external encoding is byte-based and can be chosen appropriately for +the environment and for the texts to be handled. A variety of different +character sets can be used for this external encoding (information that +will not be exhaustively presented here--instead, a description of the +major groups will suffice). All of the ASCII-based character sets +fulfill one requirement: they are "filesystem safe." This means that +the character @code{'/'} is used in the encoding @emph{only} to +represent itself. Things are a bit different for character sets like +EBCDIC (Extended Binary Coded Decimal Interchange Code, a character set +family used by IBM), but if the operating system does not understand +EBCDIC directly the parameters-to-system calls have to be converted +first anyhow. + +@itemize @bullet +@item +The simplest character sets are single-byte character sets. There can +be only up to 256 characters (for @w{8 bit} character sets), which is +not sufficient to cover all languages but might be sufficient to handle +a specific text. Handling of a @w{8 bit} character sets is simple. This +is not true for other kinds presented later, and therefore, the +application one uses might require the use of @w{8 bit} character sets. + +@cindex ISO 2022 +@item +The @w{ISO 2022} standard defines a mechanism for extended character +sets where one character @emph{can} be represented by more than one +byte. This is achieved by associating a state with the text. +Characters that can be used to change the state can be embedded in the +text. Each byte in the text might have a different interpretation in each +state. The state might even influence whether a given byte stands for a +character on its own or whether it has to be combined with some more +bytes. + +@cindex EUC +@cindex Shift_JIS +@cindex SJIS +In most uses of @w{ISO 2022} the defined character sets do not allow +state changes that cover more than the next character. This has the +big advantage that whenever one can identify the beginning of the byte +sequence of a character one can interpret a text correctly. Examples of +character sets using this policy are the various EUC character sets +(used by Sun's operating systems, EUC-JP, EUC-KR, EUC-TW, and EUC-CN) +or Shift_JIS (SJIS, a Japanese encoding). + +But there are also character sets using a state that is valid for more +than one character and has to be changed by another byte sequence. +Examples for this are ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN. + +@item +@cindex ISO 6937 +Early attempts to fix 8 bit character sets for other languages using the +Roman alphabet lead to character sets like @w{ISO 6937}. Here bytes +representing characters like the acute accent do not produce output +themselves: one has to combine them with other characters to get the +desired result. For example, the byte sequence @code{0xc2 0x61} +(non-spacing acute accent, followed by lower-case `a') to get the ``small +a with acute'' character. To get the acute accent character on its own, +one has to write @code{0xc2 0x20} (the non-spacing acute followed by a +space). + +Character sets like @w{ISO 6937} are used in some embedded systems such +as teletex. + +@item +@cindex UTF-8 +Instead of converting the Unicode or @w{ISO 10646} text used internally, +it is often also sufficient to simply use an encoding different than +UCS-2/UCS-4. The Unicode and @w{ISO 10646} standards even specify such an +encoding: UTF-8. This encoding is able to represent all of @w{ISO +10646} 31 bits in a byte string of length one to six. + +@cindex UTF-7 +There were a few other attempts to encode @w{ISO 10646} such as UTF-7, +but UTF-8 is today the only encoding that should be used. In fact, with +any luck UTF-8 will soon be the only external encoding that has to be +supported. It proves to be universally usable and its only disadvantage +is that it favors Roman languages by making the byte string +representation of other scripts (Cyrillic, Greek, Asian scripts) longer +than necessary if using a specific character set for these scripts. +Methods like the Unicode compression scheme can alleviate these +problems. +@end itemize + +The question remaining is: how to select the character set or encoding +to use. The answer: you cannot decide about it yourself, it is decided +by the developers of the system or the majority of the users. Since the +goal is interoperability one has to use whatever the other people one +works with use. If there are no constraints, the selection is based on +the requirements the expected circle of users will have. In other words, +if a project is expected to be used in only, say, Russia it is fine to use +KOI8-R or a similar character set. But if at the same time people from, +say, Greece are participating one should use a character set that allows +all people to collaborate. + +The most widely useful solution seems to be: go with the most general +character set, namely @w{ISO 10646}. Use UTF-8 as the external encoding +and problems about users not being able to use their own language +adequately are a thing of the past. + +One final comment about the choice of the wide character representation +is necessary at this point. We have said above that the natural choice +is using Unicode or @w{ISO 10646}. This is not required, but at least +encouraged, by the @w{ISO C} standard. The standard defines at least a +macro @code{__STDC_ISO_10646__} that is only defined on systems where +the @code{wchar_t} type encodes @w{ISO 10646} characters. If this +symbol is not defined one should avoid making assumptions about the wide +character representation. If the programmer uses only the functions +provided by the C library to handle wide character strings there should +be no compatibility problems with other systems. + +@node Charset Function Overview +@section Overview about Character Handling Functions + +A Unix @w{C library} contains three different sets of functions in two +families to handle character set conversion. One of the function families +(the most commonly used) is specified in the @w{ISO C90} standard and, +therefore, is portable even beyond the Unix world. Unfortunately this +family is the least useful one. These functions should be avoided +whenever possible, especially when developing libraries (as opposed to +applications). + +The second family of functions got introduced in the early Unix standards +(XPG2) and is still part of the latest and greatest Unix standard: +@w{Unix 98}. It is also the most powerful and useful set of functions. +But we will start with the functions defined in @w{Amendment 1} to +@w{ISO C90}. + +@node Restartable multibyte conversion +@section Restartable Multibyte Conversion Functions + +The @w{ISO C} standard defines functions to convert strings from a +multibyte representation to wide character strings. There are a number +of peculiarities: + +@itemize @bullet +@item +The character set assumed for the multibyte encoding is not specified +as an argument to the functions. Instead the character set specified by +the @code{LC_CTYPE} category of the current locale is used; see +@ref{Locale Categories}. + +@item +The functions handling more than one character at a time require NUL +terminated strings as the argument (i.e., converting blocks of text +does not work unless one can add a NUL byte at an appropriate place). +@Theglibc{} contains some extensions to the standard that allow +specifying a size, but basically they also expect terminated strings. +@end itemize + +Despite these limitations the @w{ISO C} functions can be used in many +contexts. In graphical user interfaces, for instance, it is not +uncommon to have functions that require text to be displayed in a wide +character string if the text is not simple ASCII. The text itself might +come from a file with translations and the user should decide about the +current locale, which determines the translation and therefore also the +external encoding used. In such a situation (and many others) the +functions described here are perfect. If more freedom while performing +the conversion is necessary take a look at the @code{iconv} functions +(@pxref{Generic Charset Conversion}). + +@menu +* Selecting the Conversion:: Selecting the conversion and its properties. +* Keeping the state:: Representing the state of the conversion. +* Converting a Character:: Converting Single Characters. +* Converting Strings:: Converting Multibyte and Wide Character + Strings. +* Multibyte Conversion Example:: A Complete Multibyte Conversion Example. +@end menu + +@node Selecting the Conversion +@subsection Selecting the conversion and its properties + +We already said above that the currently selected locale for the +@code{LC_CTYPE} category decides the conversion that is performed +by the functions we are about to describe. Each locale uses its own +character set (given as an argument to @code{localedef}) and this is the +one assumed as the external multibyte encoding. The wide character +set is always UCS-4 in @theglibc{}. + +A characteristic of each multibyte character set is the maximum number +of bytes that can be necessary to represent one character. This +information is quite important when writing code that uses the +conversion functions (as shown in the examples below). +The @w{ISO C} standard defines two macros that provide this information. + + +@comment limits.h +@comment ISO +@deftypevr Macro int MB_LEN_MAX +@code{MB_LEN_MAX} specifies the maximum number of bytes in the multibyte +sequence for a single character in any of the supported locales. It is +a compile-time constant and is defined in @file{limits.h}. +@pindex limits.h +@end deftypevr + +@comment stdlib.h +@comment ISO +@deftypevr Macro int MB_CUR_MAX +@code{MB_CUR_MAX} expands into a positive integer expression that is the +maximum number of bytes in a multibyte character in the current locale. +The value is never greater than @code{MB_LEN_MAX}. Unlike +@code{MB_LEN_MAX} this macro need not be a compile-time constant, and in +@theglibc{} it is not. + +@pindex stdlib.h +@code{MB_CUR_MAX} is defined in @file{stdlib.h}. +@end deftypevr + +Two different macros are necessary since strictly @w{ISO C90} compilers +do not allow variable length array definitions, but still it is desirable +to avoid dynamic allocation. This incomplete piece of code shows the +problem: + +@smallexample +@{ + char buf[MB_LEN_MAX]; + ssize_t len = 0; + + while (! feof (fp)) + @{ + fread (&buf[len], 1, MB_CUR_MAX - len, fp); + /* @r{@dots{} process} buf */ + len -= used; + @} +@} +@end smallexample + +The code in the inner loop is expected to have always enough bytes in +the array @var{buf} to convert one multibyte character. The array +@var{buf} has to be sized statically since many compilers do not allow a +variable size. The @code{fread} call makes sure that @code{MB_CUR_MAX} +bytes are always available in @var{buf}. Note that it isn't +a problem if @code{MB_CUR_MAX} is not a compile-time constant. + + +@node Keeping the state +@subsection Representing the state of the conversion + +@cindex stateful +In the introduction of this chapter it was said that certain character +sets use a @dfn{stateful} encoding. That is, the encoded values depend +in some way on the previous bytes in the text. + +Since the conversion functions allow converting a text in more than one +step we must have a way to pass this information from one call of the +functions to another. + +@comment wchar.h +@comment ISO +@deftp {Data type} mbstate_t +@cindex shift state +A variable of type @code{mbstate_t} can contain all the information +about the @dfn{shift state} needed from one call to a conversion +function to another. + +@pindex wchar.h +@code{mbstate_t} is defined in @file{wchar.h}. It was introduced in +@w{Amendment 1} to @w{ISO C90}. +@end deftp + +To use objects of type @code{mbstate_t} the programmer has to define such +objects (normally as local variables on the stack) and pass a pointer to +the object to the conversion functions. This way the conversion function +can update the object if the current multibyte character set is stateful. + +There is no specific function or initializer to put the state object in +any specific state. The rules are that the object should always +represent the initial state before the first use, and this is achieved by +clearing the whole variable with code such as follows: + +@smallexample +@{ + mbstate_t state; + memset (&state, '\0', sizeof (state)); + /* @r{from now on @var{state} can be used.} */ + @dots{} +@} +@end smallexample + +When using the conversion functions to generate output it is often +necessary to test whether the current state corresponds to the initial +state. This is necessary, for example, to decide whether to emit +escape sequences to set the state to the initial state at certain +sequence points. Communication protocols often require this. + +@comment wchar.h +@comment ISO +@deftypefun int mbsinit (const mbstate_t *@var{ps}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c ps is dereferenced once, unguarded. This would call for @mtsrace:ps, +@c but since a single word-sized field is (atomically) accessed, any +@c race here would be harmless. Other functions that take an optional +@c mbstate_t* argument named ps are marked with @mtasurace:<func>/!ps, +@c to indicate that the function uses a static buffer if ps is NULL. +@c These could also have been marked with @mtsrace:ps, but we'll omit +@c that for brevity, for it's somewhat redundant with the @mtasurace. +The @code{mbsinit} function determines whether the state object pointed +to by @var{ps} is in the initial state. If @var{ps} is a null pointer or +the object is in the initial state the return value is nonzero. Otherwise +it is zero. + +@pindex wchar.h +@code{mbsinit} was introduced in @w{Amendment 1} to @w{ISO C90} and is +declared in @file{wchar.h}. +@end deftypefun + +Code using @code{mbsinit} often looks similar to this: + +@c Fix the example to explicitly say how to generate the escape sequence +@c to restore the initial state. +@smallexample +@{ + mbstate_t state; + memset (&state, '\0', sizeof (state)); + /* @r{Use @var{state}.} */ + @dots{} + if (! mbsinit (&state)) + @{ + /* @r{Emit code to return to initial state.} */ + const wchar_t empty[] = L""; + const wchar_t *srcp = empty; + wcsrtombs (outbuf, &srcp, outbuflen, &state); + @} + @dots{} +@} +@end smallexample + +The code to emit the escape sequence to get back to the initial state is +interesting. The @code{wcsrtombs} function can be used to determine the +necessary output code (@pxref{Converting Strings}). Please note that with +@theglibc{} it is not necessary to perform this extra action for the +conversion from multibyte text to wide character text since the wide +character encoding is not stateful. But there is nothing mentioned in +any standard that prohibits making @code{wchar_t} use a stateful +encoding. + +@node Converting a Character +@subsection Converting Single Characters + +The most fundamental of the conversion functions are those dealing with +single characters. Please note that this does not always mean single +bytes. But since there is very often a subset of the multibyte +character set that consists of single byte sequences, there are +functions to help with converting bytes. Frequently, ASCII is a subset +of the multibyte character set. In such a scenario, each ASCII character +stands for itself, and all other characters have at least a first byte +that is beyond the range @math{0} to @math{127}. + +@comment wchar.h +@comment ISO +@deftypefun wint_t btowc (int @var{c}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c Calls btowc_fct or __fct; reads from locale, and from the +@c get_gconv_fcts result multiple times. get_gconv_fcts calls +@c __wcsmbs_load_conv to initialize the ctype if it's null. +@c wcsmbs_load_conv takes a non-recursive wrlock before allocating +@c memory for the fcts structure, initializing it, and then storing it +@c in the locale object. The initialization involves dlopening and a +@c lot more. +The @code{btowc} function (``byte to wide character'') converts a valid +single byte character @var{c} in the initial shift state into the wide +character equivalent using the conversion rules from the currently +selected locale of the @code{LC_CTYPE} category. + +If @code{(unsigned char) @var{c}} is no valid single byte multibyte +character or if @var{c} is @code{EOF}, the function returns @code{WEOF}. + +Please note the restriction of @var{c} being tested for validity only in +the initial shift state. No @code{mbstate_t} object is used from +which the state information is taken, and the function also does not use +any static state. + +@pindex wchar.h +The @code{btowc} function was introduced in @w{Amendment 1} to @w{ISO C90} +and is declared in @file{wchar.h}. +@end deftypefun + +Despite the limitation that the single byte value is always interpreted +in the initial state, this function is actually useful most of the time. +Most characters are either entirely single-byte character sets or they +are extensions to ASCII. But then it is possible to write code like this +(not that this specific example is very useful): + +@smallexample +wchar_t * +itow (unsigned long int val) +@{ + static wchar_t buf[30]; + wchar_t *wcp = &buf[29]; + *wcp = L'\0'; + while (val != 0) + @{ + *--wcp = btowc ('0' + val % 10); + val /= 10; + @} + if (wcp == &buf[29]) + *--wcp = L'0'; + return wcp; +@} +@end smallexample + +Why is it necessary to use such a complicated implementation and not +simply cast @code{'0' + val % 10} to a wide character? The answer is +that there is no guarantee that one can perform this kind of arithmetic +on the character of the character set used for @code{wchar_t} +representation. In other situations the bytes are not constant at +compile time and so the compiler cannot do the work. In situations like +this, using @code{btowc} is required. + +@noindent +There is also a function for the conversion in the other direction. + +@comment wchar.h +@comment ISO +@deftypefun int wctob (wint_t @var{c}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{wctob} function (``wide character to byte'') takes as the +parameter a valid wide character. If the multibyte representation for +this character in the initial state is exactly one byte long, the return +value of this function is this character. Otherwise the return value is +@code{EOF}. + +@pindex wchar.h +@code{wctob} was introduced in @w{Amendment 1} to @w{ISO C90} and +is declared in @file{wchar.h}. +@end deftypefun + +There are more general functions to convert single characters from +multibyte representation to wide characters and vice versa. These +functions pose no limit on the length of the multibyte representation +and they also do not require it to be in the initial state. + +@comment wchar.h +@comment ISO +@deftypefun size_t mbrtowc (wchar_t *restrict @var{pwc}, const char *restrict @var{s}, size_t @var{n}, mbstate_t *restrict @var{ps}) +@safety{@prelim{}@mtunsafe{@mtasurace{:mbrtowc/!ps}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@cindex stateful +The @code{mbrtowc} function (``multibyte restartable to wide +character'') converts the next multibyte character in the string pointed +to by @var{s} into a wide character and stores it in the wide character +string pointed to by @var{pwc}. The conversion is performed according +to the locale currently selected for the @code{LC_CTYPE} category. If +the conversion for the character set used in the locale requires a state, +the multibyte string is interpreted in the state represented by the +object pointed to by @var{ps}. If @var{ps} is a null pointer, a static, +internal state variable used only by the @code{mbrtowc} function is +used. + +If the next multibyte character corresponds to the NUL wide character, +the return value of the function is @math{0} and the state object is +afterwards in the initial state. If the next @var{n} or fewer bytes +form a correct multibyte character, the return value is the number of +bytes starting from @var{s} that form the multibyte character. The +conversion state is updated according to the bytes consumed in the +conversion. In both cases the wide character (either the @code{L'\0'} +or the one found in the conversion) is stored in the string pointed to +by @var{pwc} if @var{pwc} is not null. + +If the first @var{n} bytes of the multibyte string possibly form a valid +multibyte character but there are more than @var{n} bytes needed to +complete it, the return value of the function is @code{(size_t) -2} and +no value is stored. Please note that this can happen even if @var{n} +has a value greater than or equal to @code{MB_CUR_MAX} since the input +might contain redundant shift sequences. + +If the first @code{n} bytes of the multibyte string cannot possibly form +a valid multibyte character, no value is stored, the global variable +@code{errno} is set to the value @code{EILSEQ}, and the function returns +@code{(size_t) -1}. The conversion state is afterwards undefined. + +@pindex wchar.h +@code{mbrtowc} was introduced in @w{Amendment 1} to @w{ISO C90} and +is declared in @file{wchar.h}. +@end deftypefun + +Use of @code{mbrtowc} is straightforward. A function that copies a +multibyte string into a wide character string while at the same time +converting all lowercase characters into uppercase could look like this +(this is not the final version, just an example; it has no error +checking, and sometimes leaks memory): + +@smallexample +wchar_t * +mbstouwcs (const char *s) +@{ + size_t len = strlen (s); + wchar_t *result = malloc ((len + 1) * sizeof (wchar_t)); + wchar_t *wcp = result; + wchar_t tmp[1]; + mbstate_t state; + size_t nbytes; + + memset (&state, '\0', sizeof (state)); + while ((nbytes = mbrtowc (tmp, s, len, &state)) > 0) + @{ + if (nbytes >= (size_t) -2) + /* Invalid input string. */ + return NULL; + *wcp++ = towupper (tmp[0]); + len -= nbytes; + s += nbytes; + @} + return result; +@} +@end smallexample + +The use of @code{mbrtowc} should be clear. A single wide character is +stored in @code{@var{tmp}[0]}, and the number of consumed bytes is stored +in the variable @var{nbytes}. If the conversion is successful, the +uppercase variant of the wide character is stored in the @var{result} +array and the pointer to the input string and the number of available +bytes is adjusted. + +The only non-obvious thing about @code{mbrtowc} might be the way memory +is allocated for the result. The above code uses the fact that there +can never be more wide characters in the converted result than there are +bytes in the multibyte input string. This method yields a pessimistic +guess about the size of the result, and if many wide character strings +have to be constructed this way or if the strings are long, the extra +memory required to be allocated because the input string contains +multibyte characters might be significant. The allocated memory block can +be resized to the correct size before returning it, but a better solution +might be to allocate just the right amount of space for the result right +away. Unfortunately there is no function to compute the length of the wide +character string directly from the multibyte string. There is, however, a +function that does part of the work. + +@comment wchar.h +@comment ISO +@deftypefun size_t mbrlen (const char *restrict @var{s}, size_t @var{n}, mbstate_t *@var{ps}) +@safety{@prelim{}@mtunsafe{@mtasurace{:mbrlen/!ps}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{mbrlen} function (``multibyte restartable length'') computes +the number of at most @var{n} bytes starting at @var{s}, which form the +next valid and complete multibyte character. + +If the next multibyte character corresponds to the NUL wide character, +the return value is @math{0}. If the next @var{n} bytes form a valid +multibyte character, the number of bytes belonging to this multibyte +character byte sequence is returned. + +If the first @var{n} bytes possibly form a valid multibyte +character but the character is incomplete, the return value is +@code{(size_t) -2}. Otherwise the multibyte character sequence is invalid +and the return value is @code{(size_t) -1}. + +The multibyte sequence is interpreted in the state represented by the +object pointed to by @var{ps}. If @var{ps} is a null pointer, a state +object local to @code{mbrlen} is used. + +@pindex wchar.h +@code{mbrlen} was introduced in @w{Amendment 1} to @w{ISO C90} and +is declared in @file{wchar.h}. +@end deftypefun + +The attentive reader now will note that @code{mbrlen} can be implemented +as + +@smallexample +mbrtowc (NULL, s, n, ps != NULL ? ps : &internal) +@end smallexample + +This is true and in fact is mentioned in the official specification. +How can this function be used to determine the length of the wide +character string created from a multibyte character string? It is not +directly usable, but we can define a function @code{mbslen} using it: + +@smallexample +size_t +mbslen (const char *s) +@{ + mbstate_t state; + size_t result = 0; + size_t nbytes; + memset (&state, '\0', sizeof (state)); + while ((nbytes = mbrlen (s, MB_LEN_MAX, &state)) > 0) + @{ + if (nbytes >= (size_t) -2) + /* @r{Something is wrong.} */ + return (size_t) -1; + s += nbytes; + ++result; + @} + return result; +@} +@end smallexample + +This function simply calls @code{mbrlen} for each multibyte character +in the string and counts the number of function calls. Please note that +we here use @code{MB_LEN_MAX} as the size argument in the @code{mbrlen} +call. This is acceptable since a) this value is larger than the length of +the longest multibyte character sequence and b) we know that the string +@var{s} ends with a NUL byte, which cannot be part of any other multibyte +character sequence but the one representing the NUL wide character. +Therefore, the @code{mbrlen} function will never read invalid memory. + +Now that this function is available (just to make this clear, this +function is @emph{not} part of @theglibc{}) we can compute the +number of wide characters required to store the converted multibyte +character string @var{s} using + +@smallexample +wcs_bytes = (mbslen (s) + 1) * sizeof (wchar_t); +@end smallexample + +Please note that the @code{mbslen} function is quite inefficient. The +implementation of @code{mbstouwcs} with @code{mbslen} would have to +perform the conversion of the multibyte character input string twice, and +this conversion might be quite expensive. So it is necessary to think +about the consequences of using the easier but imprecise method before +doing the work twice. + +@comment wchar.h +@comment ISO +@deftypefun size_t wcrtomb (char *restrict @var{s}, wchar_t @var{wc}, mbstate_t *restrict @var{ps}) +@safety{@prelim{}@mtunsafe{@mtasurace{:wcrtomb/!ps}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c wcrtomb uses a static, non-thread-local unguarded state variable when +@c PS is NULL. When a state is passed in, and it's not used +@c concurrently in other threads, this function behaves safely as long +@c as gconv modules don't bring MT safety issues of their own. +@c Attempting to load gconv modules or to build conversion chains in +@c signal handlers may encounter gconv databases or caches in a +@c partially-updated state, and asynchronous cancellation may leave them +@c in such states, besides leaking the lock that guards them. +@c get_gconv_fcts ok +@c wcsmbs_load_conv ok +@c norm_add_slashes ok +@c wcsmbs_getfct ok +@c gconv_find_transform ok +@c gconv_read_conf (libc_once) +@c gconv_lookup_cache ok +@c find_module_idx ok +@c find_module ok +@c gconv_find_shlib (ok) +@c ->init_fct (assumed ok) +@c gconv_get_builtin_trans ok +@c gconv_release_step ok +@c do_lookup_alias ok +@c find_derivation ok +@c derivation_lookup ok +@c increment_counter ok +@c gconv_find_shlib ok +@c step->init_fct (assumed ok) +@c gen_steps ok +@c gconv_find_shlib ok +@c dlopen (presumed ok) +@c dlsym (presumed ok) +@c step->init_fct (assumed ok) +@c step->end_fct (assumed ok) +@c gconv_get_builtin_trans ok +@c gconv_release_step ok +@c add_derivation ok +@c gconv_close_transform ok +@c gconv_release_step ok +@c step->end_fct (assumed ok) +@c gconv_release_shlib ok +@c dlclose (presumed ok) +@c gconv_release_cache ok +@c ->tomb->__fct (assumed ok) +The @code{wcrtomb} function (``wide character restartable to +multibyte'') converts a single wide character into a multibyte string +corresponding to that wide character. + +If @var{s} is a null pointer, the function resets the state stored in +the object pointed to by @var{ps} (or the internal @code{mbstate_t} +object) to the initial state. This can also be achieved by a call like +this: + +@smallexample +wcrtombs (temp_buf, L'\0', ps) +@end smallexample + +@noindent +since, if @var{s} is a null pointer, @code{wcrtomb} performs as if it +writes into an internal buffer, which is guaranteed to be large enough. + +If @var{wc} is the NUL wide character, @code{wcrtomb} emits, if +necessary, a shift sequence to get the state @var{ps} into the initial +state followed by a single NUL byte, which is stored in the string +@var{s}. + +Otherwise a byte sequence (possibly including shift sequences) is written +into the string @var{s}. This only happens if @var{wc} is a valid wide +character (i.e., it has a multibyte representation in the character set +selected by locale of the @code{LC_CTYPE} category). If @var{wc} is no +valid wide character, nothing is stored in the strings @var{s}, +@code{errno} is set to @code{EILSEQ}, the conversion state in @var{ps} +is undefined and the return value is @code{(size_t) -1}. + +If no error occurred the function returns the number of bytes stored in +the string @var{s}. This includes all bytes representing shift +sequences. + +One word about the interface of the function: there is no parameter +specifying the length of the array @var{s}. Instead the function +assumes that there are at least @code{MB_CUR_MAX} bytes available since +this is the maximum length of any byte sequence representing a single +character. So the caller has to make sure that there is enough space +available, otherwise buffer overruns can occur. + +@pindex wchar.h +@code{wcrtomb} was introduced in @w{Amendment 1} to @w{ISO C90} and is +declared in @file{wchar.h}. +@end deftypefun + +Using @code{wcrtomb} is as easy as using @code{mbrtowc}. The following +example appends a wide character string to a multibyte character string. +Again, the code is not really useful (or correct), it is simply here to +demonstrate the use and some problems. + +@smallexample +char * +mbscatwcs (char *s, size_t len, const wchar_t *ws) +@{ + mbstate_t state; + /* @r{Find the end of the existing string.} */ + char *wp = strchr (s, '\0'); + len -= wp - s; + memset (&state, '\0', sizeof (state)); + do + @{ + size_t nbytes; + if (len < MB_CUR_LEN) + @{ + /* @r{We cannot guarantee that the next} + @r{character fits into the buffer, so} + @r{return an error.} */ + errno = E2BIG; + return NULL; + @} + nbytes = wcrtomb (wp, *ws, &state); + if (nbytes == (size_t) -1) + /* @r{Error in the conversion.} */ + return NULL; + len -= nbytes; + wp += nbytes; + @} + while (*ws++ != L'\0'); + return s; +@} +@end smallexample + +First the function has to find the end of the string currently in the +array @var{s}. The @code{strchr} call does this very efficiently since a +requirement for multibyte character representations is that the NUL byte +is never used except to represent itself (and in this context, the end +of the string). + +After initializing the state object the loop is entered where the first +task is to make sure there is enough room in the array @var{s}. We +abort if there are not at least @code{MB_CUR_LEN} bytes available. This +is not always optimal but we have no other choice. We might have less +than @code{MB_CUR_LEN} bytes available but the next multibyte character +might also be only one byte long. At the time the @code{wcrtomb} call +returns it is too late to decide whether the buffer was large enough. If +this solution is unsuitable, there is a very slow but more accurate +solution. + +@smallexample + @dots{} + if (len < MB_CUR_LEN) + @{ + mbstate_t temp_state; + memcpy (&temp_state, &state, sizeof (state)); + if (wcrtomb (NULL, *ws, &temp_state) > len) + @{ + /* @r{We cannot guarantee that the next} + @r{character fits into the buffer, so} + @r{return an error.} */ + errno = E2BIG; + return NULL; + @} + @} + @dots{} +@end smallexample + +Here we perform the conversion that might overflow the buffer so that +we are afterwards in the position to make an exact decision about the +buffer size. Please note the @code{NULL} argument for the destination +buffer in the new @code{wcrtomb} call; since we are not interested in the +converted text at this point, this is a nice way to express this. The +most unusual thing about this piece of code certainly is the duplication +of the conversion state object, but if a change of the state is necessary +to emit the next multibyte character, we want to have the same shift state +change performed in the real conversion. Therefore, we have to preserve +the initial shift state information. + +There are certainly many more and even better solutions to this problem. +This example is only provided for educational purposes. + +@node Converting Strings +@subsection Converting Multibyte and Wide Character Strings + +The functions described in the previous section only convert a single +character at a time. Most operations to be performed in real-world +programs include strings and therefore the @w{ISO C} standard also +defines conversions on entire strings. However, the defined set of +functions is quite limited; therefore, @theglibc{} contains a few +extensions that can help in some important situations. + +@comment wchar.h +@comment ISO +@deftypefun size_t mbsrtowcs (wchar_t *restrict @var{dst}, const char **restrict @var{src}, size_t @var{len}, mbstate_t *restrict @var{ps}) +@safety{@prelim{}@mtunsafe{@mtasurace{:mbsrtowcs/!ps}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{mbsrtowcs} function (``multibyte string restartable to wide +character string'') converts the NUL-terminated multibyte character +string at @code{*@var{src}} into an equivalent wide character string, +including the NUL wide character at the end. The conversion is started +using the state information from the object pointed to by @var{ps} or +from an internal object of @code{mbsrtowcs} if @var{ps} is a null +pointer. Before returning, the state object is updated to match the state +after the last converted character. The state is the initial state if the +terminating NUL byte is reached and converted. + +If @var{dst} is not a null pointer, the result is stored in the array +pointed to by @var{dst}; otherwise, the conversion result is not +available since it is stored in an internal buffer. + +If @var{len} wide characters are stored in the array @var{dst} before +reaching the end of the input string, the conversion stops and @var{len} +is returned. If @var{dst} is a null pointer, @var{len} is never checked. + +Another reason for a premature return from the function call is if the +input string contains an invalid multibyte sequence. In this case the +global variable @code{errno} is set to @code{EILSEQ} and the function +returns @code{(size_t) -1}. + +@c XXX The ISO C9x draft seems to have a problem here. It says that PS +@c is not updated if DST is NULL. This is not said straightforward and +@c none of the other functions is described like this. It would make sense +@c to define the function this way but I don't think it is meant like this. + +In all other cases the function returns the number of wide characters +converted during this call. If @var{dst} is not null, @code{mbsrtowcs} +stores in the pointer pointed to by @var{src} either a null pointer (if +the NUL byte in the input string was reached) or the address of the byte +following the last converted multibyte character. + +@pindex wchar.h +@code{mbsrtowcs} was introduced in @w{Amendment 1} to @w{ISO C90} and is +declared in @file{wchar.h}. +@end deftypefun + +The definition of the @code{mbsrtowcs} function has one important +limitation. The requirement that @var{dst} has to be a NUL-terminated +string provides problems if one wants to convert buffers with text. A +buffer is not normally a collection of NUL-terminated strings but instead a +continuous collection of lines, separated by newline characters. Now +assume that a function to convert one line from a buffer is needed. Since +the line is not NUL-terminated, the source pointer cannot directly point +into the unmodified text buffer. This means, either one inserts the NUL +byte at the appropriate place for the time of the @code{mbsrtowcs} +function call (which is not doable for a read-only buffer or in a +multi-threaded application) or one copies the line in an extra buffer +where it can be terminated by a NUL byte. Note that it is not in general +possible to limit the number of characters to convert by setting the +parameter @var{len} to any specific value. Since it is not known how +many bytes each multibyte character sequence is in length, one can only +guess. + +@cindex stateful +There is still a problem with the method of NUL-terminating a line right +after the newline character, which could lead to very strange results. +As said in the description of the @code{mbsrtowcs} function above, the +conversion state is guaranteed to be in the initial shift state after +processing the NUL byte at the end of the input string. But this NUL +byte is not really part of the text (i.e., the conversion state after +the newline in the original text could be something different than the +initial shift state and therefore the first character of the next line +is encoded using this state). But the state in question is never +accessible to the user since the conversion stops after the NUL byte +(which resets the state). Most stateful character sets in use today +require that the shift state after a newline be the initial state--but +this is not a strict guarantee. Therefore, simply NUL-terminating a +piece of a running text is not always an adequate solution and, +therefore, should never be used in generally used code. + +The generic conversion interface (@pxref{Generic Charset Conversion}) +does not have this limitation (it simply works on buffers, not +strings), and @theglibc{} contains a set of functions that take +additional parameters specifying the maximal number of bytes that are +consumed from the input string. This way the problem of +@code{mbsrtowcs}'s example above could be solved by determining the line +length and passing this length to the function. + +@comment wchar.h +@comment ISO +@deftypefun size_t wcsrtombs (char *restrict @var{dst}, const wchar_t **restrict @var{src}, size_t @var{len}, mbstate_t *restrict @var{ps}) +@safety{@prelim{}@mtunsafe{@mtasurace{:wcsrtombs/!ps}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{wcsrtombs} function (``wide character string restartable to +multibyte string'') converts the NUL-terminated wide character string at +@code{*@var{src}} into an equivalent multibyte character string and +stores the result in the array pointed to by @var{dst}. The NUL wide +character is also converted. The conversion starts in the state +described in the object pointed to by @var{ps} or by a state object +local to @code{wcsrtombs} in case @var{ps} is a null pointer. If +@var{dst} is a null pointer, the conversion is performed as usual but the +result is not available. If all characters of the input string were +successfully converted and if @var{dst} is not a null pointer, the +pointer pointed to by @var{src} gets assigned a null pointer. + +If one of the wide characters in the input string has no valid multibyte +character equivalent, the conversion stops early, sets the global +variable @code{errno} to @code{EILSEQ}, and returns @code{(size_t) -1}. + +Another reason for a premature stop is if @var{dst} is not a null +pointer and the next converted character would require more than +@var{len} bytes in total to the array @var{dst}. In this case (and if +@var{dst} is not a null pointer) the pointer pointed to by @var{src} is +assigned a value pointing to the wide character right after the last one +successfully converted. + +Except in the case of an encoding error the return value of the +@code{wcsrtombs} function is the number of bytes in all the multibyte +character sequences stored in @var{dst}. Before returning, the state in +the object pointed to by @var{ps} (or the internal object in case +@var{ps} is a null pointer) is updated to reflect the state after the +last conversion. The state is the initial shift state in case the +terminating NUL wide character was converted. + +@pindex wchar.h +The @code{wcsrtombs} function was introduced in @w{Amendment 1} to +@w{ISO C90} and is declared in @file{wchar.h}. +@end deftypefun + +The restriction mentioned above for the @code{mbsrtowcs} function applies +here also. There is no possibility of directly controlling the number of +input characters. One has to place the NUL wide character at the correct +place or control the consumed input indirectly via the available output +array size (the @var{len} parameter). + +@comment wchar.h +@comment GNU +@deftypefun size_t mbsnrtowcs (wchar_t *restrict @var{dst}, const char **restrict @var{src}, size_t @var{nmc}, size_t @var{len}, mbstate_t *restrict @var{ps}) +@safety{@prelim{}@mtunsafe{@mtasurace{:mbsnrtowcs/!ps}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{mbsnrtowcs} function is very similar to the @code{mbsrtowcs} +function. All the parameters are the same except for @var{nmc}, which is +new. The return value is the same as for @code{mbsrtowcs}. + +This new parameter specifies how many bytes at most can be used from the +multibyte character string. In other words, the multibyte character +string @code{*@var{src}} need not be NUL-terminated. But if a NUL byte +is found within the @var{nmc} first bytes of the string, the conversion +stops there. + +This function is a GNU extension. It is meant to work around the +problems mentioned above. Now it is possible to convert a buffer with +multibyte character text piece by piece without having to care about +inserting NUL bytes and the effect of NUL bytes on the conversion state. +@end deftypefun + +A function to convert a multibyte string into a wide character string +and display it could be written like this (this is not a really useful +example): + +@smallexample +void +showmbs (const char *src, FILE *fp) +@{ + mbstate_t state; + int cnt = 0; + memset (&state, '\0', sizeof (state)); + while (1) + @{ + wchar_t linebuf[100]; + const char *endp = strchr (src, '\n'); + size_t n; + + /* @r{Exit if there is no more line.} */ + if (endp == NULL) + break; + + n = mbsnrtowcs (linebuf, &src, endp - src, 99, &state); + linebuf[n] = L'\0'; + fprintf (fp, "line %d: \"%S\"\n", linebuf); + @} +@} +@end smallexample + +There is no problem with the state after a call to @code{mbsnrtowcs}. +Since we don't insert characters in the strings that were not in there +right from the beginning and we use @var{state} only for the conversion +of the given buffer, there is no problem with altering the state. + +@comment wchar.h +@comment GNU +@deftypefun size_t wcsnrtombs (char *restrict @var{dst}, const wchar_t **restrict @var{src}, size_t @var{nwc}, size_t @var{len}, mbstate_t *restrict @var{ps}) +@safety{@prelim{}@mtunsafe{@mtasurace{:wcsnrtombs/!ps}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{wcsnrtombs} function implements the conversion from wide +character strings to multibyte character strings. It is similar to +@code{wcsrtombs} but, just like @code{mbsnrtowcs}, it takes an extra +parameter, which specifies the length of the input string. + +No more than @var{nwc} wide characters from the input string +@code{*@var{src}} are converted. If the input string contains a NUL +wide character in the first @var{nwc} characters, the conversion stops at +this place. + +The @code{wcsnrtombs} function is a GNU extension and just like +@code{mbsnrtowcs} helps in situations where no NUL-terminated input +strings are available. +@end deftypefun + + +@node Multibyte Conversion Example +@subsection A Complete Multibyte Conversion Example + +The example programs given in the last sections are only brief and do +not contain all the error checking, etc. Presented here is a complete +and documented example. It features the @code{mbrtowc} function but it +should be easy to derive versions using the other functions. + +@smallexample +int +file_mbsrtowcs (int input, int output) +@{ + /* @r{Note the use of @code{MB_LEN_MAX}.} + @r{@code{MB_CUR_MAX} cannot portably be used here.} */ + char buffer[BUFSIZ + MB_LEN_MAX]; + mbstate_t state; + int filled = 0; + int eof = 0; + + /* @r{Initialize the state.} */ + memset (&state, '\0', sizeof (state)); + + while (!eof) + @{ + ssize_t nread; + ssize_t nwrite; + char *inp = buffer; + wchar_t outbuf[BUFSIZ]; + wchar_t *outp = outbuf; + + /* @r{Fill up the buffer from the input file.} */ + nread = read (input, buffer + filled, BUFSIZ); + if (nread < 0) + @{ + perror ("read"); + return 0; + @} + /* @r{If we reach end of file, make a note to read no more.} */ + if (nread == 0) + eof = 1; + + /* @r{@code{filled} is now the number of bytes in @code{buffer}.} */ + filled += nread; + + /* @r{Convert those bytes to wide characters--as many as we can.} */ + while (1) + @{ + size_t thislen = mbrtowc (outp, inp, filled, &state); + /* @r{Stop converting at invalid character;} + @r{this can mean we have read just the first part} + @r{of a valid character.} */ + if (thislen == (size_t) -1) + break; + /* @r{We want to handle embedded NUL bytes} + @r{but the return value is 0. Correct this.} */ + if (thislen == 0) + thislen = 1; + /* @r{Advance past this character.} */ + inp += thislen; + filled -= thislen; + ++outp; + @} + + /* @r{Write the wide characters we just made.} */ + nwrite = write (output, outbuf, + (outp - outbuf) * sizeof (wchar_t)); + if (nwrite < 0) + @{ + perror ("write"); + return 0; + @} + + /* @r{See if we have a @emph{real} invalid character.} */ + if ((eof && filled > 0) || filled >= MB_CUR_MAX) + @{ + error (0, 0, "invalid multibyte character"); + return 0; + @} + + /* @r{If any characters must be carried forward,} + @r{put them at the beginning of @code{buffer}.} */ + if (filled > 0) + memmove (buffer, inp, filled); + @} + + return 1; +@} +@end smallexample + + +@node Non-reentrant Conversion +@section Non-reentrant Conversion Function + +The functions described in the previous chapter are defined in +@w{Amendment 1} to @w{ISO C90}, but the original @w{ISO C90} standard +also contained functions for character set conversion. The reason that +these original functions are not described first is that they are almost +entirely useless. + +The problem is that all the conversion functions described in the +original @w{ISO C90} use a local state. Using a local state implies that +multiple conversions at the same time (not only when using threads) +cannot be done, and that you cannot first convert single characters and +then strings since you cannot tell the conversion functions which state +to use. + +These original functions are therefore usable only in a very limited set +of situations. One must complete converting the entire string before +starting a new one, and each string/text must be converted with the same +function (there is no problem with the library itself; it is guaranteed +that no library function changes the state of any of these functions). +@strong{For the above reasons it is highly requested that the functions +described in the previous section be used in place of non-reentrant +conversion functions.} + +@menu +* Non-reentrant Character Conversion:: Non-reentrant Conversion of Single + Characters. +* Non-reentrant String Conversion:: Non-reentrant Conversion of Strings. +* Shift State:: States in Non-reentrant Functions. +@end menu + +@node Non-reentrant Character Conversion +@subsection Non-reentrant Conversion of Single Characters + +@comment stdlib.h +@comment ISO +@deftypefun int mbtowc (wchar_t *restrict @var{result}, const char *restrict @var{string}, size_t @var{size}) +@safety{@prelim{}@mtunsafe{@mtasurace{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{mbtowc} (``multibyte to wide character'') function when called +with non-null @var{string} converts the first multibyte character +beginning at @var{string} to its corresponding wide character code. It +stores the result in @code{*@var{result}}. + +@code{mbtowc} never examines more than @var{size} bytes. (The idea is +to supply for @var{size} the number of bytes of data you have in hand.) + +@code{mbtowc} with non-null @var{string} distinguishes three +possibilities: the first @var{size} bytes at @var{string} start with +valid multibyte characters, they start with an invalid byte sequence or +just part of a character, or @var{string} points to an empty string (a +null character). + +For a valid multibyte character, @code{mbtowc} converts it to a wide +character and stores that in @code{*@var{result}}, and returns the +number of bytes in that character (always at least @math{1} and never +more than @var{size}). + +For an invalid byte sequence, @code{mbtowc} returns @math{-1}. For an +empty string, it returns @math{0}, also storing @code{'\0'} in +@code{*@var{result}}. + +If the multibyte character code uses shift characters, then +@code{mbtowc} maintains and updates a shift state as it scans. If you +call @code{mbtowc} with a null pointer for @var{string}, that +initializes the shift state to its standard initial value. It also +returns nonzero if the multibyte character code in use actually has a +shift state. @xref{Shift State}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun int wctomb (char *@var{string}, wchar_t @var{wchar}) +@safety{@prelim{}@mtunsafe{@mtasurace{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{wctomb} (``wide character to multibyte'') function converts +the wide character code @var{wchar} to its corresponding multibyte +character sequence, and stores the result in bytes starting at +@var{string}. At most @code{MB_CUR_MAX} characters are stored. + +@code{wctomb} with non-null @var{string} distinguishes three +possibilities for @var{wchar}: a valid wide character code (one that can +be translated to a multibyte character), an invalid code, and +@code{L'\0'}. + +Given a valid code, @code{wctomb} converts it to a multibyte character, +storing the bytes starting at @var{string}. Then it returns the number +of bytes in that character (always at least @math{1} and never more +than @code{MB_CUR_MAX}). + +If @var{wchar} is an invalid wide character code, @code{wctomb} returns +@math{-1}. If @var{wchar} is @code{L'\0'}, it returns @code{0}, also +storing @code{'\0'} in @code{*@var{string}}. + +If the multibyte character code uses shift characters, then +@code{wctomb} maintains and updates a shift state as it scans. If you +call @code{wctomb} with a null pointer for @var{string}, that +initializes the shift state to its standard initial value. It also +returns nonzero if the multibyte character code in use actually has a +shift state. @xref{Shift State}. + +Calling this function with a @var{wchar} argument of zero when +@var{string} is not null has the side-effect of reinitializing the +stored shift state @emph{as well as} storing the multibyte character +@code{'\0'} and returning @math{0}. +@end deftypefun + +Similar to @code{mbrlen} there is also a non-reentrant function that +computes the length of a multibyte character. It can be defined in +terms of @code{mbtowc}. + +@comment stdlib.h +@comment ISO +@deftypefun int mblen (const char *@var{string}, size_t @var{size}) +@safety{@prelim{}@mtunsafe{@mtasurace{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{mblen} function with a non-null @var{string} argument returns +the number of bytes that make up the multibyte character beginning at +@var{string}, never examining more than @var{size} bytes. (The idea is +to supply for @var{size} the number of bytes of data you have in hand.) + +The return value of @code{mblen} distinguishes three possibilities: the +first @var{size} bytes at @var{string} start with valid multibyte +characters, they start with an invalid byte sequence or just part of a +character, or @var{string} points to an empty string (a null character). + +For a valid multibyte character, @code{mblen} returns the number of +bytes in that character (always at least @code{1} and never more than +@var{size}). For an invalid byte sequence, @code{mblen} returns +@math{-1}. For an empty string, it returns @math{0}. + +If the multibyte character code uses shift characters, then @code{mblen} +maintains and updates a shift state as it scans. If you call +@code{mblen} with a null pointer for @var{string}, that initializes the +shift state to its standard initial value. It also returns a nonzero +value if the multibyte character code in use actually has a shift state. +@xref{Shift State}. + +@pindex stdlib.h +The function @code{mblen} is declared in @file{stdlib.h}. +@end deftypefun + + +@node Non-reentrant String Conversion +@subsection Non-reentrant Conversion of Strings + +For convenience the @w{ISO C90} standard also defines functions to +convert entire strings instead of single characters. These functions +suffer from the same problems as their reentrant counterparts from +@w{Amendment 1} to @w{ISO C90}; see @ref{Converting Strings}. + +@comment stdlib.h +@comment ISO +@deftypefun size_t mbstowcs (wchar_t *@var{wstring}, const char *@var{string}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c Odd... Although this was supposed to be non-reentrant, the internal +@c state is not a static buffer, but an automatic variable. +The @code{mbstowcs} (``multibyte string to wide character string'') +function converts the null-terminated string of multibyte characters +@var{string} to an array of wide character codes, storing not more than +@var{size} wide characters into the array beginning at @var{wstring}. +The terminating null character counts towards the size, so if @var{size} +is less than the actual number of wide characters resulting from +@var{string}, no terminating null character is stored. + +The conversion of characters from @var{string} begins in the initial +shift state. + +If an invalid multibyte character sequence is found, the @code{mbstowcs} +function returns a value of @math{-1}. Otherwise, it returns the number +of wide characters stored in the array @var{wstring}. This number does +not include the terminating null character, which is present if the +number is less than @var{size}. + +Here is an example showing how to convert a string of multibyte +characters, allocating enough space for the result. + +@smallexample +wchar_t * +mbstowcs_alloc (const char *string) +@{ + size_t size = strlen (string) + 1; + wchar_t *buf = xmalloc (size * sizeof (wchar_t)); + + size = mbstowcs (buf, string, size); + if (size == (size_t) -1) + return NULL; + buf = xrealloc (buf, (size + 1) * sizeof (wchar_t)); + return buf; +@} +@end smallexample + +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun size_t wcstombs (char *@var{string}, const wchar_t *@var{wstring}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +The @code{wcstombs} (``wide character string to multibyte string'') +function converts the null-terminated wide character array @var{wstring} +into a string containing multibyte characters, storing not more than +@var{size} bytes starting at @var{string}, followed by a terminating +null character if there is room. The conversion of characters begins in +the initial shift state. + +The terminating null character counts towards the size, so if @var{size} +is less than or equal to the number of bytes needed in @var{wstring}, no +terminating null character is stored. + +If a code that does not correspond to a valid multibyte character is +found, the @code{wcstombs} function returns a value of @math{-1}. +Otherwise, the return value is the number of bytes stored in the array +@var{string}. This number does not include the terminating null character, +which is present if the number is less than @var{size}. +@end deftypefun + +@node Shift State +@subsection States in Non-reentrant Functions + +In some multibyte character codes, the @emph{meaning} of any particular +byte sequence is not fixed; it depends on what other sequences have come +earlier in the same string. Typically there are just a few sequences that +can change the meaning of other sequences; these few are called +@dfn{shift sequences} and we say that they set the @dfn{shift state} for +other sequences that follow. + +To illustrate shift state and shift sequences, suppose we decide that +the sequence @code{0200} (just one byte) enters Japanese mode, in which +pairs of bytes in the range from @code{0240} to @code{0377} are single +characters, while @code{0201} enters Latin-1 mode, in which single bytes +in the range from @code{0240} to @code{0377} are characters, and +interpreted according to the ISO Latin-1 character set. This is a +multibyte code that has two alternative shift states (``Japanese mode'' +and ``Latin-1 mode''), and two shift sequences that specify particular +shift states. + +When the multibyte character code in use has shift states, then +@code{mblen}, @code{mbtowc}, and @code{wctomb} must maintain and update +the current shift state as they scan the string. To make this work +properly, you must follow these rules: + +@itemize @bullet +@item +Before starting to scan a string, call the function with a null pointer +for the multibyte character address---for example, @code{mblen (NULL, +0)}. This initializes the shift state to its standard initial value. + +@item +Scan the string one character at a time, in order. Do not ``back up'' +and rescan characters already scanned, and do not intersperse the +processing of different strings. +@end itemize + +Here is an example of using @code{mblen} following these rules: + +@smallexample +void +scan_string (char *s) +@{ + int length = strlen (s); + + /* @r{Initialize shift state.} */ + mblen (NULL, 0); + + while (1) + @{ + int thischar = mblen (s, length); + /* @r{Deal with end of string and invalid characters.} */ + if (thischar == 0) + break; + if (thischar == -1) + @{ + error ("invalid multibyte character"); + break; + @} + /* @r{Advance past this character.} */ + s += thischar; + length -= thischar; + @} +@} +@end smallexample + +The functions @code{mblen}, @code{mbtowc} and @code{wctomb} are not +reentrant when using a multibyte code that uses a shift state. However, +no other library functions call these functions, so you don't have to +worry that the shift state will be changed mysteriously. + + +@node Generic Charset Conversion +@section Generic Charset Conversion + +The conversion functions mentioned so far in this chapter all had in +common that they operate on character sets that are not directly +specified by the functions. The multibyte encoding used is specified by +the currently selected locale for the @code{LC_CTYPE} category. The +wide character set is fixed by the implementation (in the case of @theglibc{} +it is always UCS-4 encoded @w{ISO 10646}). + +This has of course several problems when it comes to general character +conversion: + +@itemize @bullet +@item +For every conversion where neither the source nor the destination +character set is the character set of the locale for the @code{LC_CTYPE} +category, one has to change the @code{LC_CTYPE} locale using +@code{setlocale}. + +Changing the @code{LC_CTYPE} locale introduces major problems for the rest +of the programs since several more functions (e.g., the character +classification functions, @pxref{Classification of Characters}) use the +@code{LC_CTYPE} category. + +@item +Parallel conversions to and from different character sets are not +possible since the @code{LC_CTYPE} selection is global and shared by all +threads. + +@item +If neither the source nor the destination character set is the character +set used for @code{wchar_t} representation, there is at least a two-step +process necessary to convert a text using the functions above. One would +have to select the source character set as the multibyte encoding, +convert the text into a @code{wchar_t} text, select the destination +character set as the multibyte encoding, and convert the wide character +text to the multibyte (@math{=} destination) character set. + +Even if this is possible (which is not guaranteed) it is a very tiring +work. Plus it suffers from the other two raised points even more due to +the steady changing of the locale. +@end itemize + +The XPG2 standard defines a completely new set of functions, which has +none of these limitations. They are not at all coupled to the selected +locales, and they have no constraints on the character sets selected for +source and destination. Only the set of available conversions limits +them. The standard does not specify that any conversion at all must be +available. Such availability is a measure of the quality of the +implementation. + +In the following text first the interface to @code{iconv} and then the +conversion function, will be described. Comparisons with other +implementations will show what obstacles stand in the way of portable +applications. Finally, the implementation is described in so far as might +interest the advanced user who wants to extend conversion capabilities. + +@menu +* Generic Conversion Interface:: Generic Character Set Conversion Interface. +* iconv Examples:: A complete @code{iconv} example. +* Other iconv Implementations:: Some Details about other @code{iconv} + Implementations. +* glibc iconv Implementation:: The @code{iconv} Implementation in the GNU C + library. +@end menu + +@node Generic Conversion Interface +@subsection Generic Character Set Conversion Interface + +This set of functions follows the traditional cycle of using a resource: +open--use--close. The interface consists of three functions, each of +which implements one step. + +Before the interfaces are described it is necessary to introduce a +data type. Just like other open--use--close interfaces the functions +introduced here work using handles and the @file{iconv.h} header +defines a special type for the handles used. + +@comment iconv.h +@comment XPG2 +@deftp {Data Type} iconv_t +This data type is an abstract type defined in @file{iconv.h}. The user +must not assume anything about the definition of this type; it must be +completely opaque. + +Objects of this type can be assigned handles for the conversions using +the @code{iconv} functions. The objects themselves need not be freed, but +the conversions for which the handles stand for have to. +@end deftp + +@noindent +The first step is the function to create a handle. + +@comment iconv.h +@comment XPG2 +@deftypefun iconv_t iconv_open (const char *@var{tocode}, const char *@var{fromcode}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c Calls malloc if tocode and/or fromcode are too big for alloca. Calls +@c strip and upstr on both, then gconv_open. strip and upstr call +@c isalnum_l and toupper_l with the C locale. gconv_open may MT-safely +@c tokenize toset, replace unspecified codesets with the current locale +@c (possibly two different accesses), and finally it calls +@c gconv_find_transform and initializes the gconv_t result with all the +@c steps in the conversion sequence, running each one's initializer, +@c destructing and releasing them all if anything fails. + +The @code{iconv_open} function has to be used before starting a +conversion. The two parameters this function takes determine the +source and destination character set for the conversion, and if the +implementation has the possibility to perform such a conversion, the +function returns a handle. + +If the wanted conversion is not available, the @code{iconv_open} function +returns @code{(iconv_t) -1}. In this case the global variable +@code{errno} can have the following values: + +@table @code +@item EMFILE +The process already has @code{OPEN_MAX} file descriptors open. +@item ENFILE +The system limit of open files is reached. +@item ENOMEM +Not enough memory to carry out the operation. +@item EINVAL +The conversion from @var{fromcode} to @var{tocode} is not supported. +@end table + +It is not possible to use the same descriptor in different threads to +perform independent conversions. The data structures associated +with the descriptor include information about the conversion state. +This must not be messed up by using it in different conversions. + +An @code{iconv} descriptor is like a file descriptor as for every use a +new descriptor must be created. The descriptor does not stand for all +of the conversions from @var{fromset} to @var{toset}. + +The @glibcadj{} implementation of @code{iconv_open} has one +significant extension to other implementations. To ease the extension +of the set of available conversions, the implementation allows storing +the necessary files with data and code in an arbitrary number of +directories. How this extension must be written will be explained below +(@pxref{glibc iconv Implementation}). Here it is only important to say +that all directories mentioned in the @code{GCONV_PATH} environment +variable are considered only if they contain a file @file{gconv-modules}. +These directories need not necessarily be created by the system +administrator. In fact, this extension is introduced to help users +writing and using their own, new conversions. Of course, this does not +work for security reasons in SUID binaries; in this case only the system +directory is considered and this normally is +@file{@var{prefix}/lib/gconv}. The @code{GCONV_PATH} environment +variable is examined exactly once at the first call of the +@code{iconv_open} function. Later modifications of the variable have no +effect. + +@pindex iconv.h +The @code{iconv_open} function was introduced early in the X/Open +Portability Guide, @w{version 2}. It is supported by all commercial +Unices as it is required for the Unix branding. However, the quality and +completeness of the implementation varies widely. The @code{iconv_open} +function is declared in @file{iconv.h}. +@end deftypefun + +The @code{iconv} implementation can associate large data structure with +the handle returned by @code{iconv_open}. Therefore, it is crucial to +free all the resources once all conversions are carried out and the +conversion is not needed anymore. + +@comment iconv.h +@comment XPG2 +@deftypefun int iconv_close (iconv_t @var{cd}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c Calls gconv_close to destruct and release each of the conversion +@c steps, release the gconv_t object, then call gconv_close_transform. +@c Access to the gconv_t object is not guarded, but calling iconv_close +@c concurrently with any other use is undefined. + +The @code{iconv_close} function frees all resources associated with the +handle @var{cd}, which must have been returned by a successful call to +the @code{iconv_open} function. + +If the function call was successful the return value is @math{0}. +Otherwise it is @math{-1} and @code{errno} is set appropriately. +Defined errors are: + +@table @code +@item EBADF +The conversion descriptor is invalid. +@end table + +@pindex iconv.h +The @code{iconv_close} function was introduced together with the rest +of the @code{iconv} functions in XPG2 and is declared in @file{iconv.h}. +@end deftypefun + +The standard defines only one actual conversion function. This has, +therefore, the most general interface: it allows conversion from one +buffer to another. Conversion from a file to a buffer, vice versa, or +even file to file can be implemented on top of it. + +@comment iconv.h +@comment XPG2 +@deftypefun size_t iconv (iconv_t @var{cd}, char **@var{inbuf}, size_t *@var{inbytesleft}, char **@var{outbuf}, size_t *@var{outbytesleft}) +@safety{@prelim{}@mtsafe{@mtsrace{:cd}}@assafe{}@acunsafe{@acucorrupt{}}} +@c Without guarding access to the iconv_t object pointed to by cd, call +@c the conversion function to convert inbuf or flush the internal +@c conversion state. +@cindex stateful +The @code{iconv} function converts the text in the input buffer +according to the rules associated with the descriptor @var{cd} and +stores the result in the output buffer. It is possible to call the +function for the same text several times in a row since for stateful +character sets the necessary state information is kept in the data +structures associated with the descriptor. + +The input buffer is specified by @code{*@var{inbuf}} and it contains +@code{*@var{inbytesleft}} bytes. The extra indirection is necessary for +communicating the used input back to the caller (see below). It is +important to note that the buffer pointer is of type @code{char} and the +length is measured in bytes even if the input text is encoded in wide +characters. + +The output buffer is specified in a similar way. @code{*@var{outbuf}} +points to the beginning of the buffer with at least +@code{*@var{outbytesleft}} bytes room for the result. The buffer +pointer again is of type @code{char} and the length is measured in +bytes. If @var{outbuf} or @code{*@var{outbuf}} is a null pointer, the +conversion is performed but no output is available. + +If @var{inbuf} is a null pointer, the @code{iconv} function performs the +necessary action to put the state of the conversion into the initial +state. This is obviously a no-op for non-stateful encodings, but if the +encoding has a state, such a function call might put some byte sequences +in the output buffer, which perform the necessary state changes. The +next call with @var{inbuf} not being a null pointer then simply goes on +from the initial state. It is important that the programmer never makes +any assumption as to whether the conversion has to deal with states. +Even if the input and output character sets are not stateful, the +implementation might still have to keep states. This is due to the +implementation chosen for @theglibc{} as it is described below. +Therefore an @code{iconv} call to reset the state should always be +performed if some protocol requires this for the output text. + +The conversion stops for one of three reasons. The first is that all +characters from the input buffer are converted. This actually can mean +two things: either all bytes from the input buffer are consumed or +there are some bytes at the end of the buffer that possibly can form a +complete character but the input is incomplete. The second reason for a +stop is that the output buffer is full. And the third reason is that +the input contains invalid characters. + +In all of these cases the buffer pointers after the last successful +conversion, for the input and output buffers, are stored in @var{inbuf} and +@var{outbuf}, and the available room in each buffer is stored in +@var{inbytesleft} and @var{outbytesleft}. + +Since the character sets selected in the @code{iconv_open} call can be +almost arbitrary, there can be situations where the input buffer contains +valid characters, which have no identical representation in the output +character set. The behavior in this situation is undefined. The +@emph{current} behavior of @theglibc{} in this situation is to +return with an error immediately. This certainly is not the most +desirable solution; therefore, future versions will provide better ones, +but they are not yet finished. + +If all input from the input buffer is successfully converted and stored +in the output buffer, the function returns the number of non-reversible +conversions performed. In all other cases the return value is +@code{(size_t) -1} and @code{errno} is set appropriately. In such cases +the value pointed to by @var{inbytesleft} is nonzero. + +@table @code +@item EILSEQ +The conversion stopped because of an invalid byte sequence in the input. +After the call, @code{*@var{inbuf}} points at the first byte of the +invalid byte sequence. + +@item E2BIG +The conversion stopped because it ran out of space in the output buffer. + +@item EINVAL +The conversion stopped because of an incomplete byte sequence at the end +of the input buffer. + +@item EBADF +The @var{cd} argument is invalid. +@end table + +@pindex iconv.h +The @code{iconv} function was introduced in the XPG2 standard and is +declared in the @file{iconv.h} header. +@end deftypefun + +The definition of the @code{iconv} function is quite good overall. It +provides quite flexible functionality. The only problems lie in the +boundary cases, which are incomplete byte sequences at the end of the +input buffer and invalid input. A third problem, which is not really +a design problem, is the way conversions are selected. The standard +does not say anything about the legitimate names, a minimal set of +available conversions. We will see how this negatively impacts other +implementations, as demonstrated below. + +@node iconv Examples +@subsection A complete @code{iconv} example + +The example below features a solution for a common problem. Given that +one knows the internal encoding used by the system for @code{wchar_t} +strings, one often is in the position to read text from a file and store +it in wide character buffers. One can do this using @code{mbsrtowcs}, +but then we run into the problems discussed above. + +@smallexample +int +file2wcs (int fd, const char *charset, wchar_t *outbuf, size_t avail) +@{ + char inbuf[BUFSIZ]; + size_t insize = 0; + char *wrptr = (char *) outbuf; + int result = 0; + iconv_t cd; + + cd = iconv_open ("WCHAR_T", charset); + if (cd == (iconv_t) -1) + @{ + /* @r{Something went wrong.} */ + if (errno == EINVAL) + error (0, 0, "conversion from '%s' to wchar_t not available", + charset); + else + perror ("iconv_open"); + + /* @r{Terminate the output string.} */ + *outbuf = L'\0'; + + return -1; + @} + + while (avail > 0) + @{ + size_t nread; + size_t nconv; + char *inptr = inbuf; + + /* @r{Read more input.} */ + nread = read (fd, inbuf + insize, sizeof (inbuf) - insize); + if (nread == 0) + @{ + /* @r{When we come here the file is completely read.} + @r{This still could mean there are some unused} + @r{characters in the @code{inbuf}. Put them back.} */ + if (lseek (fd, -insize, SEEK_CUR) == -1) + result = -1; + + /* @r{Now write out the byte sequence to get into the} + @r{initial state if this is necessary.} */ + iconv (cd, NULL, NULL, &wrptr, &avail); + + break; + @} + insize += nread; + + /* @r{Do the conversion.} */ + nconv = iconv (cd, &inptr, &insize, &wrptr, &avail); + if (nconv == (size_t) -1) + @{ + /* @r{Not everything went right. It might only be} + @r{an unfinished byte sequence at the end of the} + @r{buffer. Or it is a real problem.} */ + if (errno == EINVAL) + /* @r{This is harmless. Simply move the unused} + @r{bytes to the beginning of the buffer so that} + @r{they can be used in the next round.} */ + memmove (inbuf, inptr, insize); + else + @{ + /* @r{It is a real problem. Maybe we ran out of} + @r{space in the output buffer or we have invalid} + @r{input. In any case back the file pointer to} + @r{the position of the last processed byte.} */ + lseek (fd, -insize, SEEK_CUR); + result = -1; + break; + @} + @} + @} + + /* @r{Terminate the output string.} */ + if (avail >= sizeof (wchar_t)) + *((wchar_t *) wrptr) = L'\0'; + + if (iconv_close (cd) != 0) + perror ("iconv_close"); + + return (wchar_t *) wrptr - outbuf; +@} +@end smallexample + +@cindex stateful +This example shows the most important aspects of using the @code{iconv} +functions. It shows how successive calls to @code{iconv} can be used to +convert large amounts of text. The user does not have to care about +stateful encodings as the functions take care of everything. + +An interesting point is the case where @code{iconv} returns an error and +@code{errno} is set to @code{EINVAL}. This is not really an error in the +transformation. It can happen whenever the input character set contains +byte sequences of more than one byte for some character and texts are not +processed in one piece. In this case there is a chance that a multibyte +sequence is cut. The caller can then simply read the remainder of the +takes and feed the offending bytes together with new character from the +input to @code{iconv} and continue the work. The internal state kept in +the descriptor is @emph{not} unspecified after such an event as is the +case with the conversion functions from the @w{ISO C} standard. + +The example also shows the problem of using wide character strings with +@code{iconv}. As explained in the description of the @code{iconv} +function above, the function always takes a pointer to a @code{char} +array and the available space is measured in bytes. In the example, the +output buffer is a wide character buffer; therefore, we use a local +variable @var{wrptr} of type @code{char *}, which is used in the +@code{iconv} calls. + +This looks rather innocent but can lead to problems on platforms that +have tight restriction on alignment. Therefore the caller of @code{iconv} +has to make sure that the pointers passed are suitable for access of +characters from the appropriate character set. Since, in the +above case, the input parameter to the function is a @code{wchar_t} +pointer, this is the case (unless the user violates alignment when +computing the parameter). But in other situations, especially when +writing generic functions where one does not know what type of character +set one uses and, therefore, treats text as a sequence of bytes, it might +become tricky. + +@node Other iconv Implementations +@subsection Some Details about other @code{iconv} Implementations + +This is not really the place to discuss the @code{iconv} implementation +of other systems but it is necessary to know a bit about them to write +portable programs. The above mentioned problems with the specification +of the @code{iconv} functions can lead to portability issues. + +The first thing to notice is that, due to the large number of character +sets in use, it is certainly not practical to encode the conversions +directly in the C library. Therefore, the conversion information must +come from files outside the C library. This is usually done in one or +both of the following ways: + +@itemize @bullet +@item +The C library contains a set of generic conversion functions that can +read the needed conversion tables and other information from data files. +These files get loaded when necessary. + +This solution is problematic as it requires a great deal of effort to +apply to all character sets (potentially an infinite set). The +differences in the structure of the different character sets is so large +that many different variants of the table-processing functions must be +developed. In addition, the generic nature of these functions make them +slower than specifically implemented functions. + +@item +The C library only contains a framework that can dynamically load +object files and execute the conversion functions contained therein. + +This solution provides much more flexibility. The C library itself +contains only very little code and therefore reduces the general memory +footprint. Also, with a documented interface between the C library and +the loadable modules it is possible for third parties to extend the set +of available conversion modules. A drawback of this solution is that +dynamic loading must be available. +@end itemize + +Some implementations in commercial Unices implement a mixture of these +possibilities; the majority implement only the second solution. Using +loadable modules moves the code out of the library itself and keeps +the door open for extensions and improvements, but this design is also +limiting on some platforms since not many platforms support dynamic +loading in statically linked programs. On platforms without this +capability it is therefore not possible to use this interface in +statically linked programs. @Theglibc{} has, on ELF platforms, no +problems with dynamic loading in these situations; therefore, this +point is moot. The danger is that one gets acquainted with this +situation and forgets about the restrictions on other systems. + +A second thing to know about other @code{iconv} implementations is that +the number of available conversions is often very limited. Some +implementations provide, in the standard release (not special +international or developer releases), at most 100 to 200 conversion +possibilities. This does not mean 200 different character sets are +supported; for example, conversions from one character set to a set of 10 +others might count as 10 conversions. Together with the other direction +this makes 20 conversion possibilities used up by one character set. One +can imagine the thin coverage these platforms provide. Some Unix vendors +even provide only a handful of conversions, which renders them useless for +almost all uses. + +This directly leads to a third and probably the most problematic point. +The way the @code{iconv} conversion functions are implemented on all +known Unix systems and the availability of the conversion functions from +character set @math{@cal{A}} to @math{@cal{B}} and the conversion from +@math{@cal{B}} to @math{@cal{C}} does @emph{not} imply that the +conversion from @math{@cal{A}} to @math{@cal{C}} is available. + +This might not seem unreasonable and problematic at first, but it is a +quite big problem as one will notice shortly after hitting it. To show +the problem we assume to write a program that has to convert from +@math{@cal{A}} to @math{@cal{C}}. A call like + +@smallexample +cd = iconv_open ("@math{@cal{C}}", "@math{@cal{A}}"); +@end smallexample + +@noindent +fails according to the assumption above. But what does the program +do now? The conversion is necessary; therefore, simply giving up is not +an option. + +This is a nuisance. The @code{iconv} function should take care of this. +But how should the program proceed from here on? If it tries to convert +to character set @math{@cal{B}}, first the two @code{iconv_open} +calls + +@smallexample +cd1 = iconv_open ("@math{@cal{B}}", "@math{@cal{A}}"); +@end smallexample + +@noindent +and + +@smallexample +cd2 = iconv_open ("@math{@cal{C}}", "@math{@cal{B}}"); +@end smallexample + +@noindent +will succeed, but how to find @math{@cal{B}}? + +Unfortunately, the answer is: there is no general solution. On some +systems guessing might help. On those systems most character sets can +convert to and from UTF-8 encoded @w{ISO 10646} or Unicode text. Besides +this only some very system-specific methods can help. Since the +conversion functions come from loadable modules and these modules must +be stored somewhere in the filesystem, one @emph{could} try to find them +and determine from the available file which conversions are available +and whether there is an indirect route from @math{@cal{A}} to +@math{@cal{C}}. + +This example shows one of the design errors of @code{iconv} mentioned +above. It should at least be possible to determine the list of available +conversions programmatically so that if @code{iconv_open} says there is no +such conversion, one could make sure this also is true for indirect +routes. + +@node glibc iconv Implementation +@subsection The @code{iconv} Implementation in @theglibc{} + +After reading about the problems of @code{iconv} implementations in the +last section it is certainly good to note that the implementation in +@theglibc{} has none of the problems mentioned above. What +follows is a step-by-step analysis of the points raised above. The +evaluation is based on the current state of the development (as of +January 1999). The development of the @code{iconv} functions is not +complete, but basic functionality has solidified. + +@Theglibc{}'s @code{iconv} implementation uses shared loadable +modules to implement the conversions. A very small number of +conversions are built into the library itself but these are only rather +trivial conversions. + +All the benefits of loadable modules are available in the @glibcadj{} +implementation. This is especially appealing since the interface is +well documented (see below), and it, therefore, is easy to write new +conversion modules. The drawback of using loadable objects is not a +problem in @theglibc{}, at least on ELF systems. Since the +library is able to load shared objects even in statically linked +binaries, static linking need not be forbidden in case one wants to use +@code{iconv}. + +The second mentioned problem is the number of supported conversions. +Currently, @theglibc{} supports more than 150 character sets. The +way the implementation is designed the number of supported conversions +is greater than 22350 (@math{150} times @math{149}). If any conversion +from or to a character set is missing, it can be added easily. + +Particularly impressive as it may be, this high number is due to the +fact that the @glibcadj{} implementation of @code{iconv} does not have +the third problem mentioned above (i.e., whenever there is a conversion +from a character set @math{@cal{A}} to @math{@cal{B}} and from +@math{@cal{B}} to @math{@cal{C}} it is always possible to convert from +@math{@cal{A}} to @math{@cal{C}} directly). If the @code{iconv_open} +returns an error and sets @code{errno} to @code{EINVAL}, there is no +known way, directly or indirectly, to perform the wanted conversion. + +@cindex triangulation +Triangulation is achieved by providing for each character set a +conversion from and to UCS-4 encoded @w{ISO 10646}. Using @w{ISO 10646} +as an intermediate representation it is possible to @dfn{triangulate} +(i.e., convert with an intermediate representation). + +There is no inherent requirement to provide a conversion to @w{ISO +10646} for a new character set, and it is also possible to provide other +conversions where neither source nor destination character set is @w{ISO +10646}. The existing set of conversions is simply meant to cover all +conversions that might be of interest. + +@cindex ISO-2022-JP +@cindex EUC-JP +All currently available conversions use the triangulation method above, +making conversion run unnecessarily slow. If, for example, somebody +often needs the conversion from ISO-2022-JP to EUC-JP, a quicker solution +would involve direct conversion between the two character sets, skipping +the input to @w{ISO 10646} first. The two character sets of interest +are much more similar to each other than to @w{ISO 10646}. + +In such a situation one easily can write a new conversion and provide it +as a better alternative. The @glibcadj{} @code{iconv} implementation +would automatically use the module implementing the conversion if it is +specified to be more efficient. + +@subsubsection Format of @file{gconv-modules} files + +All information about the available conversions comes from a file named +@file{gconv-modules}, which can be found in any of the directories along +the @code{GCONV_PATH}. The @file{gconv-modules} files are line-oriented +text files, where each of the lines has one of the following formats: + +@itemize @bullet +@item +If the first non-whitespace character is a @kbd{#} the line contains only +comments and is ignored. + +@item +Lines starting with @code{alias} define an alias name for a character +set. Two more words are expected on the line. The first word +defines the alias name, and the second defines the original name of the +character set. The effect is that it is possible to use the alias name +in the @var{fromset} or @var{toset} parameters of @code{iconv_open} and +achieve the same result as when using the real character set name. + +This is quite important as a character set has often many different +names. There is normally an official name but this need not correspond to +the most popular name. Besides this many character sets have special +names that are somehow constructed. For example, all character sets +specified by the ISO have an alias of the form @code{ISO-IR-@var{nnn}} +where @var{nnn} is the registration number. This allows programs that +know about the registration number to construct character set names and +use them in @code{iconv_open} calls. More on the available names and +aliases follows below. + +@item +Lines starting with @code{module} introduce an available conversion +module. These lines must contain three or four more words. + +The first word specifies the source character set, the second word the +destination character set of conversion implemented in this module, and +the third word is the name of the loadable module. The filename is +constructed by appending the usual shared object suffix (normally +@file{.so}) and this file is then supposed to be found in the same +directory the @file{gconv-modules} file is in. The last word on the line, +which is optional, is a numeric value representing the cost of the +conversion. If this word is missing, a cost of @math{1} is assumed. The +numeric value itself does not matter that much; what counts are the +relative values of the sums of costs for all possible conversion paths. +Below is a more precise description of the use of the cost value. +@end itemize + +Returning to the example above where one has written a module to directly +convert from ISO-2022-JP to EUC-JP and back. All that has to be done is +to put the new module, let its name be ISO2022JP-EUCJP.so, in a directory +and add a file @file{gconv-modules} with the following content in the +same directory: + +@smallexample +module ISO-2022-JP// EUC-JP// ISO2022JP-EUCJP 1 +module EUC-JP// ISO-2022-JP// ISO2022JP-EUCJP 1 +@end smallexample + +To see why this is sufficient, it is necessary to understand how the +conversion used by @code{iconv} (and described in the descriptor) is +selected. The approach to this problem is quite simple. + +At the first call of the @code{iconv_open} function the program reads +all available @file{gconv-modules} files and builds up two tables: one +containing all the known aliases and another that contains the +information about the conversions and which shared object implements +them. + +@subsubsection Finding the conversion path in @code{iconv} + +The set of available conversions form a directed graph with weighted +edges. The weights on the edges are the costs specified in the +@file{gconv-modules} files. The @code{iconv_open} function uses an +algorithm suitable for search for the best path in such a graph and so +constructs a list of conversions that must be performed in succession +to get the transformation from the source to the destination character +set. + +Explaining why the above @file{gconv-modules} files allows the +@code{iconv} implementation to resolve the specific ISO-2022-JP to +EUC-JP conversion module instead of the conversion coming with the +library itself is straightforward. Since the latter conversion takes two +steps (from ISO-2022-JP to @w{ISO 10646} and then from @w{ISO 10646} to +EUC-JP), the cost is @math{1+1 = 2}. The above @file{gconv-modules} +file, however, specifies that the new conversion modules can perform this +conversion with only the cost of @math{1}. + +A mysterious item about the @file{gconv-modules} file above (and also +the file coming with @theglibc{}) are the names of the character +sets specified in the @code{module} lines. Why do almost all the names +end in @code{//}? And this is not all: the names can actually be +regular expressions. At this point in time this mystery should not be +revealed, unless you have the relevant spell-casting materials: ashes +from an original @w{DOS 6.2} boot disk burnt in effigy, a crucifix +blessed by St.@: Emacs, assorted herbal roots from Central America, sand +from Cebu, etc. Sorry! @strong{The part of the implementation where +this is used is not yet finished. For now please simply follow the +existing examples. It'll become clearer once it is. --drepper} + +A last remark about the @file{gconv-modules} is about the names not +ending with @code{//}. A character set named @code{INTERNAL} is often +mentioned. From the discussion above and the chosen name it should have +become clear that this is the name for the representation used in the +intermediate step of the triangulation. We have said that this is UCS-4 +but actually that is not quite right. The UCS-4 specification also +includes the specification of the byte ordering used. Since a UCS-4 value +consists of four bytes, a stored value is affected by byte ordering. The +internal representation is @emph{not} the same as UCS-4 in case the byte +ordering of the processor (or at least the running process) is not the +same as the one required for UCS-4. This is done for performance reasons +as one does not want to perform unnecessary byte-swapping operations if +one is not interested in actually seeing the result in UCS-4. To avoid +trouble with endianness, the internal representation consistently is named +@code{INTERNAL} even on big-endian systems where the representations are +identical. + +@subsubsection @code{iconv} module data structures + +So far this section has described how modules are located and considered +to be used. What remains to be described is the interface of the modules +so that one can write new ones. This section describes the interface as +it is in use in January 1999. The interface will change a bit in the +future but, with luck, only in an upwardly compatible way. + +The definitions necessary to write new modules are publicly available +in the non-standard header @file{gconv.h}. The following text, +therefore, describes the definitions from this header file. First, +however, it is necessary to get an overview. + +From the perspective of the user of @code{iconv} the interface is quite +simple: the @code{iconv_open} function returns a handle that can be used +in calls to @code{iconv}, and finally the handle is freed with a call to +@code{iconv_close}. The problem is that the handle has to be able to +represent the possibly long sequences of conversion steps and also the +state of each conversion since the handle is all that is passed to the +@code{iconv} function. Therefore, the data structures are really the +elements necessary to understanding the implementation. + +We need two different kinds of data structures. The first describes the +conversion and the second describes the state etc. There are really two +type definitions like this in @file{gconv.h}. +@pindex gconv.h + +@comment gconv.h +@comment GNU +@deftp {Data type} {struct __gconv_step} +This data structure describes one conversion a module can perform. For +each function in a loaded module with conversion functions there is +exactly one object of this type. This object is shared by all users of +the conversion (i.e., this object does not contain any information +corresponding to an actual conversion; it only describes the conversion +itself). + +@table @code +@item struct __gconv_loaded_object *__shlib_handle +@itemx const char *__modname +@itemx int __counter +All these elements of the structure are used internally in the C library +to coordinate loading and unloading the shared object. One must not expect any +of the other elements to be available or initialized. + +@item const char *__from_name +@itemx const char *__to_name +@code{__from_name} and @code{__to_name} contain the names of the source and +destination character sets. They can be used to identify the actual +conversion to be carried out since one module might implement conversions +for more than one character set and/or direction. + +@item gconv_fct __fct +@itemx gconv_init_fct __init_fct +@itemx gconv_end_fct __end_fct +These elements contain pointers to the functions in the loadable module. +The interface will be explained below. + +@item int __min_needed_from +@itemx int __max_needed_from +@itemx int __min_needed_to +@itemx int __max_needed_to; +These values have to be supplied in the init function of the module. The +@code{__min_needed_from} value specifies how many bytes a character of +the source character set at least needs. The @code{__max_needed_from} +specifies the maximum value that also includes possible shift sequences. + +The @code{__min_needed_to} and @code{__max_needed_to} values serve the +same purpose as @code{__min_needed_from} and @code{__max_needed_from} but +this time for the destination character set. + +It is crucial that these values be accurate since otherwise the +conversion functions will have problems or not work at all. + +@item int __stateful +This element must also be initialized by the init function. +@code{int __stateful} is nonzero if the source character set is stateful. +Otherwise it is zero. + +@item void *__data +This element can be used freely by the conversion functions in the +module. @code{void *__data} can be used to communicate extra information +from one call to another. @code{void *__data} need not be initialized if +not needed at all. If @code{void *__data} element is assigned a pointer +to dynamically allocated memory (presumably in the init function) it has +to be made sure that the end function deallocates the memory. Otherwise +the application will leak memory. + +It is important to be aware that this data structure is shared by all +users of this specification conversion and therefore the @code{__data} +element must not contain data specific to one specific use of the +conversion function. +@end table +@end deftp + +@comment gconv.h +@comment GNU +@deftp {Data type} {struct __gconv_step_data} +This is the data structure that contains the information specific to +each use of the conversion functions. + + +@table @code +@item char *__outbuf +@itemx char *__outbufend +These elements specify the output buffer for the conversion step. The +@code{__outbuf} element points to the beginning of the buffer, and +@code{__outbufend} points to the byte following the last byte in the +buffer. The conversion function must not assume anything about the size +of the buffer but it can be safely assumed there is room for at +least one complete character in the output buffer. + +Once the conversion is finished, if the conversion is the last step, the +@code{__outbuf} element must be modified to point after the last byte +written into the buffer to signal how much output is available. If this +conversion step is not the last one, the element must not be modified. +The @code{__outbufend} element must not be modified. + +@item int __is_last +This element is nonzero if this conversion step is the last one. This +information is necessary for the recursion. See the description of the +conversion function internals below. This element must never be +modified. + +@item int __invocation_counter +The conversion function can use this element to see how many calls of +the conversion function already happened. Some character sets require a +certain prolog when generating output, and by comparing this value with +zero, one can find out whether it is the first call and whether, +therefore, the prolog should be emitted. This element must never be +modified. + +@item int __internal_use +This element is another one rarely used but needed in certain +situations. It is assigned a nonzero value in case the conversion +functions are used to implement @code{mbsrtowcs} et.al.@: (i.e., the +function is not used directly through the @code{iconv} interface). + +This sometimes makes a difference as it is expected that the +@code{iconv} functions are used to translate entire texts while the +@code{mbsrtowcs} functions are normally used only to convert single +strings and might be used multiple times to convert entire texts. + +But in this situation we would have problem complying with some rules of +the character set specification. Some character sets require a prolog, +which must appear exactly once for an entire text. If a number of +@code{mbsrtowcs} calls are used to convert the text, only the first call +must add the prolog. However, because there is no communication between the +different calls of @code{mbsrtowcs}, the conversion functions have no +possibility to find this out. The situation is different for sequences +of @code{iconv} calls since the handle allows access to the needed +information. + +The @code{int __internal_use} element is mostly used together with +@code{__invocation_counter} as follows: + +@smallexample +if (!data->__internal_use + && data->__invocation_counter == 0) + /* @r{Emit prolog.} */ + @dots{} +@end smallexample + +This element must never be modified. + +@item mbstate_t *__statep +The @code{__statep} element points to an object of type @code{mbstate_t} +(@pxref{Keeping the state}). The conversion of a stateful character +set must use the object pointed to by @code{__statep} to store +information about the conversion state. The @code{__statep} element +itself must never be modified. + +@item mbstate_t __state +This element must @emph{never} be used directly. It is only part of +this structure to have the needed space allocated. +@end table +@end deftp + +@subsubsection @code{iconv} module interfaces + +With the knowledge about the data structures we now can describe the +conversion function itself. To understand the interface a bit of +knowledge is necessary about the functionality in the C library that +loads the objects with the conversions. + +It is often the case that one conversion is used more than once (i.e., +there are several @code{iconv_open} calls for the same set of character +sets during one program run). The @code{mbsrtowcs} et.al.@: functions in +@theglibc{} also use the @code{iconv} functionality, which +increases the number of uses of the same functions even more. + +Because of this multiple use of conversions, the modules do not get +loaded exclusively for one conversion. Instead a module once loaded can +be used by an arbitrary number of @code{iconv} or @code{mbsrtowcs} calls +at the same time. The splitting of the information between conversion- +function-specific information and conversion data makes this possible. +The last section showed the two data structures used to do this. + +This is of course also reflected in the interface and semantics of the +functions that the modules must provide. There are three functions that +must have the following names: + +@table @code +@item gconv_init +The @code{gconv_init} function initializes the conversion function +specific data structure. This very same object is shared by all +conversions that use this conversion and, therefore, no state information +about the conversion itself must be stored in here. If a module +implements more than one conversion, the @code{gconv_init} function will +be called multiple times. + +@item gconv_end +The @code{gconv_end} function is responsible for freeing all resources +allocated by the @code{gconv_init} function. If there is nothing to do, +this function can be missing. Special care must be taken if the module +implements more than one conversion and the @code{gconv_init} function +does not allocate the same resources for all conversions. + +@item gconv +This is the actual conversion function. It is called to convert one +block of text. It gets passed the conversion step information +initialized by @code{gconv_init} and the conversion data, specific to +this use of the conversion functions. +@end table + +There are three data types defined for the three module interface +functions and these define the interface. + +@comment gconv.h +@comment GNU +@deftypevr {Data type} int {(*__gconv_init_fct)} (struct __gconv_step *) +This specifies the interface of the initialization function of the +module. It is called exactly once for each conversion the module +implements. + +As explained in the description of the @code{struct __gconv_step} data +structure above the initialization function has to initialize parts of +it. + +@table @code +@item __min_needed_from +@itemx __max_needed_from +@itemx __min_needed_to +@itemx __max_needed_to +These elements must be initialized to the exact numbers of the minimum +and maximum number of bytes used by one character in the source and +destination character sets, respectively. If the characters all have the +same size, the minimum and maximum values are the same. + +@item __stateful +This element must be initialized to a nonzero value if the source +character set is stateful. Otherwise it must be zero. +@end table + +If the initialization function needs to communicate some information +to the conversion function, this communication can happen using the +@code{__data} element of the @code{__gconv_step} structure. But since +this data is shared by all the conversions, it must not be modified by +the conversion function. The example below shows how this can be used. + +@smallexample +#define MIN_NEEDED_FROM 1 +#define MAX_NEEDED_FROM 4 +#define MIN_NEEDED_TO 4 +#define MAX_NEEDED_TO 4 + +int +gconv_init (struct __gconv_step *step) +@{ + /* @r{Determine which direction.} */ + struct iso2022jp_data *new_data; + enum direction dir = illegal_dir; + enum variant var = illegal_var; + int result; + + if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0) + @{ + dir = from_iso2022jp; + var = iso2022jp; + @} + else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0) + @{ + dir = to_iso2022jp; + var = iso2022jp; + @} + else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0) + @{ + dir = from_iso2022jp; + var = iso2022jp2; + @} + else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0) + @{ + dir = to_iso2022jp; + var = iso2022jp2; + @} + + result = __GCONV_NOCONV; + if (dir != illegal_dir) + @{ + new_data = (struct iso2022jp_data *) + malloc (sizeof (struct iso2022jp_data)); + + result = __GCONV_NOMEM; + if (new_data != NULL) + @{ + new_data->dir = dir; + new_data->var = var; + step->__data = new_data; + + if (dir == from_iso2022jp) + @{ + step->__min_needed_from = MIN_NEEDED_FROM; + step->__max_needed_from = MAX_NEEDED_FROM; + step->__min_needed_to = MIN_NEEDED_TO; + step->__max_needed_to = MAX_NEEDED_TO; + @} + else + @{ + step->__min_needed_from = MIN_NEEDED_TO; + step->__max_needed_from = MAX_NEEDED_TO; + step->__min_needed_to = MIN_NEEDED_FROM; + step->__max_needed_to = MAX_NEEDED_FROM + 2; + @} + + /* @r{Yes, this is a stateful encoding.} */ + step->__stateful = 1; + + result = __GCONV_OK; + @} + @} + + return result; +@} +@end smallexample + +The function first checks which conversion is wanted. The module from +which this function is taken implements four different conversions; +which one is selected can be determined by comparing the names. The +comparison should always be done without paying attention to the case. + +Next, a data structure, which contains the necessary information about +which conversion is selected, is allocated. The data structure +@code{struct iso2022jp_data} is locally defined since, outside the +module, this data is not used at all. Please note that if all four +conversions this module supports are requested there are four data +blocks. + +One interesting thing is the initialization of the @code{__min_} and +@code{__max_} elements of the step data object. A single ISO-2022-JP +character can consist of one to four bytes. Therefore the +@code{MIN_NEEDED_FROM} and @code{MAX_NEEDED_FROM} macros are defined +this way. The output is always the @code{INTERNAL} character set (aka +UCS-4) and therefore each character consists of exactly four bytes. For +the conversion from @code{INTERNAL} to ISO-2022-JP we have to take into +account that escape sequences might be necessary to switch the character +sets. Therefore the @code{__max_needed_to} element for this direction +gets assigned @code{MAX_NEEDED_FROM + 2}. This takes into account the +two bytes needed for the escape sequences to signal the switching. The +asymmetry in the maximum values for the two directions can be explained +easily: when reading ISO-2022-JP text, escape sequences can be handled +alone (i.e., it is not necessary to process a real character since the +effect of the escape sequence can be recorded in the state information). +The situation is different for the other direction. Since it is in +general not known which character comes next, one cannot emit escape +sequences to change the state in advance. This means the escape +sequences have to be emitted together with the next character. +Therefore one needs more room than only for the character itself. + +The possible return values of the initialization function are: + +@table @code +@item __GCONV_OK +The initialization succeeded +@item __GCONV_NOCONV +The requested conversion is not supported in the module. This can +happen if the @file{gconv-modules} file has errors. +@item __GCONV_NOMEM +Memory required to store additional information could not be allocated. +@end table +@end deftypevr + +The function called before the module is unloaded is significantly +easier. It often has nothing at all to do; in which case it can be left +out completely. + +@comment gconv.h +@comment GNU +@deftypevr {Data type} void {(*__gconv_end_fct)} (struct gconv_step *) +The task of this function is to free all resources allocated in the +initialization function. Therefore only the @code{__data} element of +the object pointed to by the argument is of interest. Continuing the +example from the initialization function, the finalization function +looks like this: + +@smallexample +void +gconv_end (struct __gconv_step *data) +@{ + free (data->__data); +@} +@end smallexample +@end deftypevr + +The most important function is the conversion function itself, which can +get quite complicated for complex character sets. But since this is not +of interest here, we will only describe a possible skeleton for the +conversion function. + +@comment gconv.h +@comment GNU +@deftypevr {Data type} int {(*__gconv_fct)} (struct __gconv_step *, struct __gconv_step_data *, const char **, const char *, size_t *, int) +The conversion function can be called for two basic reasons: to convert +text or to reset the state. From the description of the @code{iconv} +function it can be seen why the flushing mode is necessary. What mode +is selected is determined by the sixth argument, an integer. This +argument being nonzero means that flushing is selected. + +Common to both modes is where the output buffer can be found. The +information about this buffer is stored in the conversion step data. A +pointer to this information is passed as the second argument to this +function. The description of the @code{struct __gconv_step_data} +structure has more information on the conversion step data. + +@cindex stateful +What has to be done for flushing depends on the source character set. +If the source character set is not stateful, nothing has to be done. +Otherwise the function has to emit a byte sequence to bring the state +object into the initial state. Once this all happened the other +conversion modules in the chain of conversions have to get the same +chance. Whether another step follows can be determined from the +@code{__is_last} element of the step data structure to which the first +parameter points. + +The more interesting mode is when actual text has to be converted. The +first step in this case is to convert as much text as possible from the +input buffer and store the result in the output buffer. The start of the +input buffer is determined by the third argument, which is a pointer to a +pointer variable referencing the beginning of the buffer. The fourth +argument is a pointer to the byte right after the last byte in the buffer. + +The conversion has to be performed according to the current state if the +character set is stateful. The state is stored in an object pointed to +by the @code{__statep} element of the step data (second argument). Once +either the input buffer is empty or the output buffer is full the +conversion stops. At this point, the pointer variable referenced by the +third parameter must point to the byte following the last processed +byte (i.e., if all of the input is consumed, this pointer and the fourth +parameter have the same value). + +What now happens depends on whether this step is the last one. If it is +the last step, the only thing that has to be done is to update the +@code{__outbuf} element of the step data structure to point after the +last written byte. This update gives the caller the information on how +much text is available in the output buffer. In addition, the variable +pointed to by the fifth parameter, which is of type @code{size_t}, must +be incremented by the number of characters (@emph{not bytes}) that were +converted in a non-reversible way. Then, the function can return. + +In case the step is not the last one, the later conversion functions have +to get a chance to do their work. Therefore, the appropriate conversion +function has to be called. The information about the functions is +stored in the conversion data structures, passed as the first parameter. +This information and the step data are stored in arrays, so the next +element in both cases can be found by simple pointer arithmetic: + +@smallexample +int +gconv (struct __gconv_step *step, struct __gconv_step_data *data, + const char **inbuf, const char *inbufend, size_t *written, + int do_flush) +@{ + struct __gconv_step *next_step = step + 1; + struct __gconv_step_data *next_data = data + 1; + @dots{} +@end smallexample + +The @code{next_step} pointer references the next step information and +@code{next_data} the next data record. The call of the next function +therefore will look similar to this: + +@smallexample + next_step->__fct (next_step, next_data, &outerr, outbuf, + written, 0) +@end smallexample + +But this is not yet all. Once the function call returns the conversion +function might have some more to do. If the return value of the function +is @code{__GCONV_EMPTY_INPUT}, more room is available in the output +buffer. Unless the input buffer is empty, the conversion functions start +all over again and process the rest of the input buffer. If the return +value is not @code{__GCONV_EMPTY_INPUT}, something went wrong and we have +to recover from this. + +A requirement for the conversion function is that the input buffer +pointer (the third argument) always point to the last character that +was put in converted form into the output buffer. This is trivially +true after the conversion performed in the current step, but if the +conversion functions deeper downstream stop prematurely, not all +characters from the output buffer are consumed and, therefore, the input +buffer pointers must be backed off to the right position. + +Correcting the input buffers is easy to do if the input and output +character sets have a fixed width for all characters. In this situation +we can compute how many characters are left in the output buffer and, +therefore, can correct the input buffer pointer appropriately with a +similar computation. Things are getting tricky if either character set +has characters represented with variable length byte sequences, and it +gets even more complicated if the conversion has to take care of the +state. In these cases the conversion has to be performed once again, from +the known state before the initial conversion (i.e., if necessary the +state of the conversion has to be reset and the conversion loop has to be +executed again). The difference now is that it is known how much input +must be created, and the conversion can stop before converting the first +unused character. Once this is done the input buffer pointers must be +updated again and the function can return. + +One final thing should be mentioned. If it is necessary for the +conversion to know whether it is the first invocation (in case a prolog +has to be emitted), the conversion function should increment the +@code{__invocation_counter} element of the step data structure just +before returning to the caller. See the description of the @code{struct +__gconv_step_data} structure above for more information on how this can +be used. + +The return value must be one of the following values: + +@table @code +@item __GCONV_EMPTY_INPUT +All input was consumed and there is room left in the output buffer. +@item __GCONV_FULL_OUTPUT +No more room in the output buffer. In case this is not the last step +this value is propagated down from the call of the next conversion +function in the chain. +@item __GCONV_INCOMPLETE_INPUT +The input buffer is not entirely empty since it contains an incomplete +character sequence. +@end table + +The following example provides a framework for a conversion function. +In case a new conversion has to be written the holes in this +implementation have to be filled and that is it. + +@smallexample +int +gconv (struct __gconv_step *step, struct __gconv_step_data *data, + const char **inbuf, const char *inbufend, size_t *written, + int do_flush) +@{ + struct __gconv_step *next_step = step + 1; + struct __gconv_step_data *next_data = data + 1; + gconv_fct fct = next_step->__fct; + int status; + + /* @r{If the function is called with no input this means we have} + @r{to reset to the initial state. The possibly partly} + @r{converted input is dropped.} */ + if (do_flush) + @{ + status = __GCONV_OK; + + /* @r{Possible emit a byte sequence which put the state object} + @r{into the initial state.} */ + + /* @r{Call the steps down the chain if there are any but only} + @r{if we successfully emitted the escape sequence.} */ + if (status == __GCONV_OK && ! data->__is_last) + status = fct (next_step, next_data, NULL, NULL, + written, 1); + @} + else + @{ + /* @r{We preserve the initial values of the pointer variables.} */ + const char *inptr = *inbuf; + char *outbuf = data->__outbuf; + char *outend = data->__outbufend; + char *outptr; + + do + @{ + /* @r{Remember the start value for this round.} */ + inptr = *inbuf; + /* @r{The outbuf buffer is empty.} */ + outptr = outbuf; + + /* @r{For stateful encodings the state must be safe here.} */ + + /* @r{Run the conversion loop. @code{status} is set} + @r{appropriately afterwards.} */ + + /* @r{If this is the last step, leave the loop. There is} + @r{nothing we can do.} */ + if (data->__is_last) + @{ + /* @r{Store information about how many bytes are} + @r{available.} */ + data->__outbuf = outbuf; + + /* @r{If any non-reversible conversions were performed,} + @r{add the number to @code{*written}.} */ + + break; + @} + + /* @r{Write out all output that was produced.} */ + if (outbuf > outptr) + @{ + const char *outerr = data->__outbuf; + int result; + + result = fct (next_step, next_data, &outerr, + outbuf, written, 0); + + if (result != __GCONV_EMPTY_INPUT) + @{ + if (outerr != outbuf) + @{ + /* @r{Reset the input buffer pointer. We} + @r{document here the complex case.} */ + size_t nstatus; + + /* @r{Reload the pointers.} */ + *inbuf = inptr; + outbuf = outptr; + + /* @r{Possibly reset the state.} */ + + /* @r{Redo the conversion, but this time} + @r{the end of the output buffer is at} + @r{@code{outerr}.} */ + @} + + /* @r{Change the status.} */ + status = result; + @} + else + /* @r{All the output is consumed, we can make} + @r{ another run if everything was ok.} */ + if (status == __GCONV_FULL_OUTPUT) + status = __GCONV_OK; + @} + @} + while (status == __GCONV_OK); + + /* @r{We finished one use of this step.} */ + ++data->__invocation_counter; + @} + + return status; +@} +@end smallexample +@end deftypevr + +This information should be sufficient to write new modules. Anybody +doing so should also take a look at the available source code in the +@glibcadj{} sources. It contains many examples of working and optimized +modules. + +@c File charset.texi edited October 2001 by Dennis Grace, IBM Corporation diff --git a/REORG.TODO/manual/check-safety.sh b/REORG.TODO/manual/check-safety.sh new file mode 100644 index 0000000000..c0d93b0ba1 --- /dev/null +++ b/REORG.TODO/manual/check-safety.sh @@ -0,0 +1,124 @@ +#!/bin/sh + +# Copyright 2014-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + + +# Check that the @safety notes are self-consistent, i.e., that they're +# in proper order (mt then as then ac), that remarks appear within +# corresponding sections (mt within mt, etc), that unsafety always has +# an explicit reason and when there's a reason for unsafety it's not +# safe, and that there aren't duplicates remarks. + + +success=: + +# If no arguments are given, take all *.texi files in the current directory. +test $# != 0 || set *.texi + +# FIXME: check that each @deftypefu?n is followed by a @safety note, +# with nothing but @deftypefu?nx and comment lines in between. (There +# might be more stuff too). + + +# Check that all safety remarks have entries for all of MT, AS and AC, +# in this order, with an optional prelim note before them. +grep -n '^@safety' "$@" | +grep -v ':@safety{\(@prelim{}\)\?@mt\(un\)\?safe{.*}'\ +'@as\(un\)\?safe{.*}@ac\(un\)\?safe{.*}}' && +success=false + +# Check that @mt-started notes appear within @mtsafe or @mtunsafe, +# that @as-started notes appear within @assafe or @asunsafe, and that +# @ac-started notes appear within @acsafe or @acunsafe. Also check +# that @mt, @as and @ac are followed by an s (for safe) or u (for +# unsafe), but let @mt have as, ac or asc before [su], and let @as +# have a c (for cancel) before [su]. Also make sure blanks separate +# each of the annotations. +grep -n '^@safety' "$@" | +grep -v ':@safety{\(@prelim{}\)\?'\ +'@mt\(un\)\?safe{\(@mt\(asc\?\|ac\)\?[su][^ ]*}\)\?'\ +'\( @mt\(asc\?\|ac\)\?[su][^ ]*}\)*}'\ +'@as\(un\)\?safe{\(@asc\?[su][^ ]*}\)\?'\ +'\( @asc\?[su][^ ]*}\)*}'\ +'@ac\(un\)\?safe{\(@ac[su][^ ]*}\)\?'\ +'\( @ac[su][^ ]*}\)*}}' && +success=false + +# Make sure safety lines marked as @mtsafe do not contain any +# MT-Unsafe remark; that would be @mtu, but there could be as, ac or +# asc between mt and u. +grep -n '^@safety.*@mtsafe' "$@" | +grep '@mt\(asc\?\|ac\)?u' "$@" && +success=false + +# Make sure @mtunsafe lines contain at least one @mtu remark (with +# optional as, ac or asc between mt and u). +grep -n '^@safety.*@mtunsafe' "$@" | +grep -v '@mtunsafe{.*@mt\(asc\?\|ac\)\?u' && +success=false + +# Make sure safety lines marked as @assafe do not contain any AS-Unsafe +# remark, which could be @asu or @mtasu note (with an optional c +# between as and u in both cases). +grep -n '^@safety.*@assafe' "$@" | +grep '@\(mt\)\?asc\?u' && +success=false + +# Make sure @asunsafe lines contain at least one @asu remark (which +# could be @ascu, or @mtasu or even @mtascu). +grep -n '^@safety.*@asunsafe' "$@" | +grep -v '@mtasc\?u.*@asunsafe\|@asunsafe{.*@asc\?u' && +success=false + +# Make sure safety lines marked as @acsafe do not contain any +# AC-Unsafe remark, which could be @acu, @ascu or even @mtacu or +# @mtascu. +grep -n '^@safety.*@acsafe' "$@" | +grep '@\(mt\)\?as\?cu' && +success=false + +# Make sure @acunsafe lines contain at least one @acu remark (possibly +# implied by @ascu, @mtacu or @mtascu). +grep -n '^@safety.*@acunsafe' "$@" | +grep -v '@\(mtas\?\|as\)cu.*@acunsafe\|@acunsafe{.*@acu' && +success=false + +# Make sure there aren't duplicate remarks in the same safety note. +grep -n '^@safety' "$@" | +grep '[^:]\(@\(mt\|a[sc]\)[^ {]*{[^ ]*}\).*[^:]\1' && +success=false + +# Check that comments containing safety remarks do not contain {}s, +# that all @mt remarks appear before @as remarks, that in turn appear +# before @ac remarks, all properly blank-separated, and that an +# optional comment about exclusions is between []s at the end of the +# line. +grep -n '^@c \+[^@ ]\+\( dup\)\?'\ +'\( @\(mt\|a[sc]\)[^ ]*\)*\( \[.*\]\)\?$' "$@" | +grep -v ':@c *[^@{}]*\( @mt[^ {}]*\)*'\ +'\( @as[^ {}]*\)*\( @ac[^ {}]*\)*\( \[.*\]\)\?$' && +success=false + +# Check that comments containing safety remarks do not contain +# duplicate remarks. +grep -n '^@c \+[^@ ]\+\( dup\)\?'\ +'\( @\(mt\|a[sc]\)[^ ]*\)*\( \[.*\]\)\?$' "$@" | +grep '[^:]\(@\(mt\|a[sc]\)[^ ]*\) \(.*[^:]\)\?\1\($\| \)' && +success=false + +$success diff --git a/REORG.TODO/manual/conf.texi b/REORG.TODO/manual/conf.texi new file mode 100644 index 0000000000..6700e86539 --- /dev/null +++ b/REORG.TODO/manual/conf.texi @@ -0,0 +1,1755 @@ +@node System Configuration, Cryptographic Functions, System Management, Top +@c %MENU% Parameters describing operating system limits +@chapter System Configuration Parameters + +The functions and macros listed in this chapter give information about +configuration parameters of the operating system---for example, capacity +limits, presence of optional POSIX features, and the default path for +executable files (@pxref{String Parameters}). + +@menu +* General Limits:: Constants and functions that describe + various process-related limits that have + one uniform value for any given machine. +* System Options:: Optional POSIX features. +* Version Supported:: Version numbers of POSIX.1 and POSIX.2. +* Sysconf:: Getting specific configuration values + of general limits and system options. +* Minimums:: Minimum values for general limits. + +* Limits for Files:: Size limitations that pertain to individual files. + These can vary between file systems + or even from file to file. +* Options for Files:: Optional features that some files may support. +* File Minimums:: Minimum values for file limits. +* Pathconf:: Getting the limit values for a particular file. + +* Utility Limits:: Capacity limits of some POSIX.2 utility programs. +* Utility Minimums:: Minimum allowable values of those limits. + +* String Parameters:: Getting the default search path. +@end menu + +@node General Limits +@section General Capacity Limits +@cindex POSIX capacity limits +@cindex limits, POSIX +@cindex capacity limits, POSIX + +The POSIX.1 and POSIX.2 standards specify a number of parameters that +describe capacity limitations of the system. These limits can be fixed +constants for a given operating system, or they can vary from machine to +machine. For example, some limit values may be configurable by the +system administrator, either at run time or by rebuilding the kernel, +and this should not require recompiling application programs. + +@pindex limits.h +Each of the following limit parameters has a macro that is defined in +@file{limits.h} only if the system has a fixed, uniform limit for the +parameter in question. If the system allows different file systems or +files to have different limits, then the macro is undefined; use +@code{sysconf} to find out the limit that applies at a particular time +on a particular machine. @xref{Sysconf}. + +Each of these parameters also has another macro, with a name starting +with @samp{_POSIX}, which gives the lowest value that the limit is +allowed to have on @emph{any} POSIX system. @xref{Minimums}. + +@cindex limits, program argument size +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int ARG_MAX +If defined, the unvarying maximum combined length of the @var{argv} and +@var{environ} arguments that can be passed to the @code{exec} functions. +@end deftypevr + +@cindex limits, number of processes +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int CHILD_MAX +If defined, the unvarying maximum number of processes that can exist +with the same real user ID at any one time. In BSD and GNU, this is +controlled by the @code{RLIMIT_NPROC} resource limit; @pxref{Limits on +Resources}. +@end deftypevr + +@cindex limits, number of open files +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int OPEN_MAX +If defined, the unvarying maximum number of files that a single process +can have open simultaneously. In BSD and GNU, this is controlled +by the @code{RLIMIT_NOFILE} resource limit; @pxref{Limits on Resources}. +@end deftypevr + +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int STREAM_MAX +If defined, the unvarying maximum number of streams that a single +process can have open simultaneously. @xref{Opening Streams}. +@end deftypevr + +@cindex limits, time zone name length +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int TZNAME_MAX +If defined, the unvarying maximum length of a time zone name. +@xref{Time Zone Functions}. +@end deftypevr + +These limit macros are always defined in @file{limits.h}. + +@cindex limits, number of supplementary group IDs +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int NGROUPS_MAX +The maximum number of supplementary group IDs that one process can have. + +The value of this macro is actually a lower bound for the maximum. That +is, you can count on being able to have that many supplementary group +IDs, but a particular machine might let you have even more. You can use +@code{sysconf} to see whether a particular machine will let you have +more (@pxref{Sysconf}). +@end deftypevr + +@comment limits.h +@comment POSIX.1 +@deftypevr Macro ssize_t SSIZE_MAX +The largest value that can fit in an object of type @code{ssize_t}. +Effectively, this is the limit on the number of bytes that can be read +or written in a single operation. + +This macro is defined in all POSIX systems because this limit is never +configurable. +@end deftypevr + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int RE_DUP_MAX +The largest number of repetitions you are guaranteed is allowed in the +construct @samp{\@{@var{min},@var{max}\@}} in a regular expression. + +The value of this macro is actually a lower bound for the maximum. That +is, you can count on being able to have that many repetitions, but a +particular machine might let you have even more. You can use +@code{sysconf} to see whether a particular machine will let you have +more (@pxref{Sysconf}). And even the value that @code{sysconf} tells +you is just a lower bound---larger values might work. + +This macro is defined in all POSIX.2 systems, because POSIX.2 says it +should always be defined even if there is no specific imposed limit. +@end deftypevr + +@node System Options +@section Overall System Options +@cindex POSIX optional features +@cindex optional POSIX features + +POSIX defines certain system-specific options that not all POSIX systems +support. Since these options are provided in the kernel, not in the +library, simply using @theglibc{} does not guarantee any of these +features are supported; it depends on the system you are using. + +@pindex unistd.h +You can test for the availability of a given option using the macros in +this section, together with the function @code{sysconf}. The macros are +defined only if you include @file{unistd.h}. + +For the following macros, if the macro is defined in @file{unistd.h}, +then the option is supported. Otherwise, the option may or may not be +supported; use @code{sysconf} to find out. @xref{Sysconf}. + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro int _POSIX_JOB_CONTROL +If this symbol is defined, it indicates that the system supports job +control. Otherwise, the implementation behaves as if all processes +within a session belong to a single process group. @xref{Job Control}. +@end deftypevr + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro int _POSIX_SAVED_IDS +If this symbol is defined, it indicates that the system remembers the +effective user and group IDs of a process before it executes an +executable file with the set-user-ID or set-group-ID bits set, and that +explicitly changing the effective user or group IDs back to these values +is permitted. If this option is not defined, then if a nonprivileged +process changes its effective user or group ID to the real user or group +ID of the process, it can't change it back again. @xref{Enable/Disable +Setuid}. +@end deftypevr + +For the following macros, if the macro is defined in @file{unistd.h}, +then its value indicates whether the option is supported. A value of +@code{-1} means no, and any other value means yes. If the macro is not +defined, then the option may or may not be supported; use @code{sysconf} +to find out. @xref{Sysconf}. + +@comment unistd.h +@comment POSIX.2 +@deftypevr Macro int _POSIX2_C_DEV +If this symbol is defined, it indicates that the system has the POSIX.2 +C compiler command, @code{c89}. @Theglibc{} always defines this +as @code{1}, on the assumption that you would not have installed it if +you didn't have a C compiler. +@end deftypevr + +@comment unistd.h +@comment POSIX.2 +@deftypevr Macro int _POSIX2_FORT_DEV +If this symbol is defined, it indicates that the system has the POSIX.2 +Fortran compiler command, @code{fort77}. @Theglibc{} never +defines this, because we don't know what the system has. +@end deftypevr + +@comment unistd.h +@comment POSIX.2 +@deftypevr Macro int _POSIX2_FORT_RUN +If this symbol is defined, it indicates that the system has the POSIX.2 +@code{asa} command to interpret Fortran carriage control. @Theglibc{} +never defines this, because we don't know what the system has. +@end deftypevr + +@comment unistd.h +@comment POSIX.2 +@deftypevr Macro int _POSIX2_LOCALEDEF +If this symbol is defined, it indicates that the system has the POSIX.2 +@code{localedef} command. @Theglibc{} never defines this, because +we don't know what the system has. +@end deftypevr + +@comment unistd.h +@comment POSIX.2 +@deftypevr Macro int _POSIX2_SW_DEV +If this symbol is defined, it indicates that the system has the POSIX.2 +commands @code{ar}, @code{make}, and @code{strip}. @Theglibc{} +always defines this as @code{1}, on the assumption that you had to have +@code{ar} and @code{make} to install the library, and it's unlikely that +@code{strip} would be absent when those are present. +@end deftypevr + +@node Version Supported +@section Which Version of POSIX is Supported + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro {long int} _POSIX_VERSION +This constant represents the version of the POSIX.1 standard to which +the implementation conforms. For an implementation conforming to the +1995 POSIX.1 standard, the value is the integer @code{199506L}. + +@code{_POSIX_VERSION} is always defined (in @file{unistd.h}) in any +POSIX system. + +@strong{Usage Note:} Don't try to test whether the system supports POSIX +by including @file{unistd.h} and then checking whether +@code{_POSIX_VERSION} is defined. On a non-POSIX system, this will +probably fail because there is no @file{unistd.h}. We do not know of +@emph{any} way you can reliably test at compilation time whether your +target system supports POSIX or whether @file{unistd.h} exists. +@end deftypevr + +@comment unistd.h +@comment POSIX.2 +@deftypevr Macro {long int} _POSIX2_C_VERSION +This constant represents the version of the POSIX.2 standard which the +library and system kernel support. We don't know what value this will +be for the first version of the POSIX.2 standard, because the value is +based on the year and month in which the standard is officially adopted. + +The value of this symbol says nothing about the utilities installed on +the system. + +@strong{Usage Note:} You can use this macro to tell whether a POSIX.1 +system library supports POSIX.2 as well. Any POSIX.1 system contains +@file{unistd.h}, so include that file and then test @code{defined +(_POSIX2_C_VERSION)}. +@end deftypevr + +@node Sysconf +@section Using @code{sysconf} + +When your system has configurable system limits, you can use the +@code{sysconf} function to find out the value that applies to any +particular machine. The function and the associated @var{parameter} +constants are declared in the header file @file{unistd.h}. + +@menu +* Sysconf Definition:: Detailed specifications of @code{sysconf}. +* Constants for Sysconf:: The list of parameters @code{sysconf} can read. +* Examples of Sysconf:: How to use @code{sysconf} and the parameter + macros properly together. +@end menu + +@node Sysconf Definition +@subsection Definition of @code{sysconf} + +@comment unistd.h +@comment POSIX.1 +@deftypefun {long int} sysconf (int @var{parameter}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c Some parts of the implementation open /proc and /sys files and dirs +@c to collect system details, using fd and stream I/O depending on the +@c case. The returned max value may change over time for NPROCS, +@c NPROCS_CONF, PHYS_PAGES, AVPHYS_PAGES, NGROUPS_MAX, SIGQUEUE_MAX, +@c depending on variable values read from /proc at each call, and from +@c rlimit-obtained values CHILD_MAX, OPEN_MAX, ARG_MAX, SIGQUEUE_MAX. +This function is used to inquire about runtime system parameters. The +@var{parameter} argument should be one of the @samp{_SC_} symbols listed +below. + +The normal return value from @code{sysconf} is the value you requested. +A value of @code{-1} is returned both if the implementation does not +impose a limit, and in case of an error. + +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EINVAL +The value of the @var{parameter} is invalid. +@end table +@end deftypefun + +@node Constants for Sysconf +@subsection Constants for @code{sysconf} Parameters + +Here are the symbolic constants for use as the @var{parameter} argument +to @code{sysconf}. The values are all integer constants (more +specifically, enumeration type values). + +@vtable @code +@comment unistd.h +@comment POSIX.1 +@item _SC_ARG_MAX +Inquire about the parameter corresponding to @code{ARG_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_CHILD_MAX +Inquire about the parameter corresponding to @code{CHILD_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_OPEN_MAX +Inquire about the parameter corresponding to @code{OPEN_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_STREAM_MAX +Inquire about the parameter corresponding to @code{STREAM_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_TZNAME_MAX +Inquire about the parameter corresponding to @code{TZNAME_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_NGROUPS_MAX +Inquire about the parameter corresponding to @code{NGROUPS_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_JOB_CONTROL +Inquire about the parameter corresponding to @code{_POSIX_JOB_CONTROL}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_SAVED_IDS +Inquire about the parameter corresponding to @code{_POSIX_SAVED_IDS}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_VERSION +Inquire about the parameter corresponding to @code{_POSIX_VERSION}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_CLK_TCK +Inquire about the number of clock ticks per second; @pxref{CPU Time}. +The corresponding parameter @code{CLK_TCK} is obsolete. + +@comment unistd.h +@comment GNU +@item _SC_CHARCLASS_NAME_MAX +Inquire about the parameter corresponding to maximal length allowed for +a character class name in an extended locale specification. These +extensions are not yet standardized and so this option is not standardized +as well. + +@comment unistdh.h +@comment POSIX.1 +@item _SC_REALTIME_SIGNALS +Inquire about the parameter corresponding to @code{_POSIX_REALTIME_SIGNALS}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_PRIORITY_SCHEDULING +Inquire about the parameter corresponding to @code{_POSIX_PRIORITY_SCHEDULING}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_TIMERS +Inquire about the parameter corresponding to @code{_POSIX_TIMERS}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_ASYNCHRONOUS_IO +Inquire about the parameter corresponding to @code{_POSIX_ASYNCHRONOUS_IO}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_PRIORITIZED_IO +Inquire about the parameter corresponding to @code{_POSIX_PRIORITIZED_IO}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_SYNCHRONIZED_IO +Inquire about the parameter corresponding to @code{_POSIX_SYNCHRONIZED_IO}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_FSYNC +Inquire about the parameter corresponding to @code{_POSIX_FSYNC}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_MAPPED_FILES +Inquire about the parameter corresponding to @code{_POSIX_MAPPED_FILES}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_MEMLOCK +Inquire about the parameter corresponding to @code{_POSIX_MEMLOCK}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_MEMLOCK_RANGE +Inquire about the parameter corresponding to @code{_POSIX_MEMLOCK_RANGE}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_MEMORY_PROTECTION +Inquire about the parameter corresponding to @code{_POSIX_MEMORY_PROTECTION}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_MESSAGE_PASSING +Inquire about the parameter corresponding to @code{_POSIX_MESSAGE_PASSING}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_SEMAPHORES +Inquire about the parameter corresponding to @code{_POSIX_SEMAPHORES}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_SHARED_MEMORY_OBJECTS +Inquire about the parameter corresponding to@* +@code{_POSIX_SHARED_MEMORY_OBJECTS}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_AIO_LISTIO_MAX +Inquire about the parameter corresponding to @code{_POSIX_AIO_LISTIO_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_AIO_MAX +Inquire about the parameter corresponding to @code{_POSIX_AIO_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_AIO_PRIO_DELTA_MAX +Inquire about the value by which a process can decrease its asynchronous I/O +priority level from its own scheduling priority. This corresponds to the +run-time invariant value @code{AIO_PRIO_DELTA_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_DELAYTIMER_MAX +Inquire about the parameter corresponding to @code{_POSIX_DELAYTIMER_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_MQ_OPEN_MAX +Inquire about the parameter corresponding to @code{_POSIX_MQ_OPEN_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_MQ_PRIO_MAX +Inquire about the parameter corresponding to @code{_POSIX_MQ_PRIO_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_RTSIG_MAX +Inquire about the parameter corresponding to @code{_POSIX_RTSIG_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_SEM_NSEMS_MAX +Inquire about the parameter corresponding to @code{_POSIX_SEM_NSEMS_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_SEM_VALUE_MAX +Inquire about the parameter corresponding to @code{_POSIX_SEM_VALUE_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_SIGQUEUE_MAX +Inquire about the parameter corresponding to @code{_POSIX_SIGQUEUE_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_TIMER_MAX +Inquire about the parameter corresponding to @code{_POSIX_TIMER_MAX}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII +Inquire about the parameter corresponding to @code{_POSIX_PII}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_XTI +Inquire about the parameter corresponding to @code{_POSIX_PII_XTI}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_SOCKET +Inquire about the parameter corresponding to @code{_POSIX_PII_SOCKET}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_INTERNET +Inquire about the parameter corresponding to @code{_POSIX_PII_INTERNET}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_OSI +Inquire about the parameter corresponding to @code{_POSIX_PII_OSI}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_SELECT +Inquire about the parameter corresponding to @code{_POSIX_SELECT}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_UIO_MAXIOV +Inquire about the parameter corresponding to @code{_POSIX_UIO_MAXIOV}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_INTERNET_STREAM +Inquire about the parameter corresponding to @code{_POSIX_PII_INTERNET_STREAM}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_INTERNET_DGRAM +Inquire about the parameter corresponding to @code{_POSIX_PII_INTERNET_DGRAM}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_OSI_COTS +Inquire about the parameter corresponding to @code{_POSIX_PII_OSI_COTS}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_OSI_CLTS +Inquire about the parameter corresponding to @code{_POSIX_PII_OSI_CLTS}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_PII_OSI_M +Inquire about the parameter corresponding to @code{_POSIX_PII_OSI_M}. + +@comment unistd.h +@comment POSIX.1g +@item _SC_T_IOV_MAX +Inquire about the value associated with the @code{T_IOV_MAX} +variable. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREADS +Inquire about the parameter corresponding to @code{_POSIX_THREADS}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_SAFE_FUNCTIONS +Inquire about the parameter corresponding to@* +@code{_POSIX_THREAD_SAFE_FUNCTIONS}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_GETGR_R_SIZE_MAX +Inquire about the parameter corresponding to @code{_POSIX_GETGR_R_SIZE_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_GETPW_R_SIZE_MAX +Inquire about the parameter corresponding to @code{_POSIX_GETPW_R_SIZE_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_LOGIN_NAME_MAX +Inquire about the parameter corresponding to @code{_POSIX_LOGIN_NAME_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_TTY_NAME_MAX +Inquire about the parameter corresponding to @code{_POSIX_TTY_NAME_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_DESTRUCTOR_ITERATIONS +Inquire about the parameter corresponding to +@code{_POSIX_THREAD_DESTRUCTOR_ITERATIONS}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_KEYS_MAX +Inquire about the parameter corresponding to @code{_POSIX_THREAD_KEYS_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_STACK_MIN +Inquire about the parameter corresponding to @code{_POSIX_THREAD_STACK_MIN}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_THREADS_MAX +Inquire about the parameter corresponding to @code{_POSIX_THREAD_THREADS_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_ATTR_STACKADDR +Inquire about the parameter corresponding to@*a +@code{_POSIX_THREAD_ATTR_STACKADDR}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_ATTR_STACKSIZE +Inquire about the parameter corresponding to@* +@code{_POSIX_THREAD_ATTR_STACKSIZE}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_PRIORITY_SCHEDULING +Inquire about the parameter corresponding to +@code{_POSIX_THREAD_PRIORITY_SCHEDULING}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_PRIO_INHERIT +Inquire about the parameter corresponding to @code{_POSIX_THREAD_PRIO_INHERIT}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_PRIO_PROTECT +Inquire about the parameter corresponding to @code{_POSIX_THREAD_PRIO_PROTECT}. + +@comment unistd.h +@comment POSIX.1 +@item _SC_THREAD_PROCESS_SHARED +Inquire about the parameter corresponding to +@code{_POSIX_THREAD_PROCESS_SHARED}. + +@comment unistd.h +@comment POSIX.2 +@item _SC_2_C_DEV +Inquire about whether the system has the POSIX.2 C compiler command, +@code{c89}. + +@comment unistd.h +@comment POSIX.2 +@item _SC_2_FORT_DEV +Inquire about whether the system has the POSIX.2 Fortran compiler +command, @code{fort77}. + +@comment unistd.h +@comment POSIX.2 +@item _SC_2_FORT_RUN +Inquire about whether the system has the POSIX.2 @code{asa} command to +interpret Fortran carriage control. + +@comment unistd.h +@comment POSIX.2 +@item _SC_2_LOCALEDEF +Inquire about whether the system has the POSIX.2 @code{localedef} +command. + +@comment unistd.h +@comment POSIX.2 +@item _SC_2_SW_DEV +Inquire about whether the system has the POSIX.2 commands @code{ar}, +@code{make}, and @code{strip}. + +@comment unistd.h +@comment POSIX.2 +@item _SC_BC_BASE_MAX +Inquire about the maximum value of @code{obase} in the @code{bc} +utility. + +@comment unistd.h +@comment POSIX.2 +@item _SC_BC_DIM_MAX +Inquire about the maximum size of an array in the @code{bc} +utility. + +@comment unistd.h +@comment POSIX.2 +@item _SC_BC_SCALE_MAX +Inquire about the maximum value of @code{scale} in the @code{bc} +utility. + +@comment unistd.h +@comment POSIX.2 +@item _SC_BC_STRING_MAX +Inquire about the maximum size of a string constant in the +@code{bc} utility. + +@comment unistd.h +@comment POSIX.2 +@item _SC_COLL_WEIGHTS_MAX +Inquire about the maximum number of weights that can necessarily +be used in defining the collating sequence for a locale. + +@comment unistd.h +@comment POSIX.2 +@item _SC_EXPR_NEST_MAX +Inquire about the maximum number of expressions nested within +parentheses when using the @code{expr} utility. + +@comment unistd.h +@comment POSIX.2 +@item _SC_LINE_MAX +Inquire about the maximum size of a text line that the POSIX.2 text +utilities can handle. + +@comment unistd.h +@comment POSIX.2 +@item _SC_EQUIV_CLASS_MAX +Inquire about the maximum number of weights that can be assigned to an +entry of the @code{LC_COLLATE} category @samp{order} keyword in a locale +definition. @Theglibc{} does not presently support locale +definitions. + +@comment unistd.h +@comment POSIX.2 +@item _SC_VERSION +Inquire about the version number of POSIX.1 that the library and kernel +support. + +@comment unistd.h +@comment POSIX.2 +@item _SC_2_VERSION +Inquire about the version number of POSIX.2 that the system utilities +support. + +@comment unistd.h +@comment GNU +@item _SC_PAGESIZE +Inquire about the virtual memory page size of the machine. +@code{getpagesize} returns the same value (@pxref{Query Memory Parameters}). + +@comment unistd.h +@comment GNU +@item _SC_NPROCESSORS_CONF +Inquire about the number of configured processors. + +@comment unistd.h +@comment GNU +@item _SC_NPROCESSORS_ONLN +Inquire about the number of processors online. + +@comment unistd.h +@comment GNU +@item _SC_PHYS_PAGES +Inquire about the number of physical pages in the system. + +@comment unistd.h +@comment GNU +@item _SC_AVPHYS_PAGES +Inquire about the number of available physical pages in the system. + +@comment unistd.h +@comment GNU +@item _SC_ATEXIT_MAX +Inquire about the number of functions which can be registered as termination +functions for @code{atexit}; @pxref{Cleanups on Exit}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_VERSION +Inquire about the parameter corresponding to @code{_XOPEN_VERSION}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_XCU_VERSION +Inquire about the parameter corresponding to @code{_XOPEN_XCU_VERSION}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_UNIX +Inquire about the parameter corresponding to @code{_XOPEN_UNIX}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_REALTIME +Inquire about the parameter corresponding to @code{_XOPEN_REALTIME}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_REALTIME_THREADS +Inquire about the parameter corresponding to @code{_XOPEN_REALTIME_THREADS}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_LEGACY +Inquire about the parameter corresponding to @code{_XOPEN_LEGACY}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_CRYPT +Inquire about the parameter corresponding to @code{_XOPEN_CRYPT}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_ENH_I18N +Inquire about the parameter corresponding to @code{_XOPEN_ENH_I18N}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_SHM +Inquire about the parameter corresponding to @code{_XOPEN_SHM}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_XPG2 +Inquire about the parameter corresponding to @code{_XOPEN_XPG2}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_XPG3 +Inquire about the parameter corresponding to @code{_XOPEN_XPG3}. + +@comment unistd.h +@comment X/Open +@item _SC_XOPEN_XPG4 +Inquire about the parameter corresponding to @code{_XOPEN_XPG4}. + +@comment unistd.h +@comment X/Open +@item _SC_CHAR_BIT +Inquire about the number of bits in a variable of type @code{char}. + +@comment unistd.h +@comment X/Open +@item _SC_CHAR_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{char}. + +@comment unistd.h +@comment X/Open +@item _SC_CHAR_MIN +Inquire about the minimum value which can be stored in a variable of type +@code{char}. + +@comment unistd.h +@comment X/Open +@item _SC_INT_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{int}. + +@comment unistd.h +@comment X/Open +@item _SC_INT_MIN +Inquire about the minimum value which can be stored in a variable of type +@code{int}. + +@comment unistd.h +@comment X/Open +@item _SC_LONG_BIT +Inquire about the number of bits in a variable of type @code{long int}. + +@comment unistd.h +@comment X/Open +@item _SC_WORD_BIT +Inquire about the number of bits in a variable of a register word. + +@comment unistd.h +@comment X/Open +@item _SC_MB_LEN_MAX +Inquire about the maximum length of a multi-byte representation of a wide +character value. + +@comment unistd.h +@comment X/Open +@item _SC_NZERO +Inquire about the value used to internally represent the zero priority level for +the process execution. + +@comment unistd.h +@comment X/Open +@item SC_SSIZE_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{ssize_t}. + +@comment unistd.h +@comment X/Open +@item _SC_SCHAR_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{signed char}. + +@comment unistd.h +@comment X/Open +@item _SC_SCHAR_MIN +Inquire about the minimum value which can be stored in a variable of type +@code{signed char}. + +@comment unistd.h +@comment X/Open +@item _SC_SHRT_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{short int}. + +@comment unistd.h +@comment X/Open +@item _SC_SHRT_MIN +Inquire about the minimum value which can be stored in a variable of type +@code{short int}. + +@comment unistd.h +@comment X/Open +@item _SC_UCHAR_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{unsigned char}. + +@comment unistd.h +@comment X/Open +@item _SC_UINT_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{unsigned int}. + +@comment unistd.h +@comment X/Open +@item _SC_ULONG_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{unsigned long int}. + +@comment unistd.h +@comment X/Open +@item _SC_USHRT_MAX +Inquire about the maximum value which can be stored in a variable of type +@code{unsigned short int}. + +@comment unistd.h +@comment X/Open +@item _SC_NL_ARGMAX +Inquire about the parameter corresponding to @code{NL_ARGMAX}. + +@comment unistd.h +@comment X/Open +@item _SC_NL_LANGMAX +Inquire about the parameter corresponding to @code{NL_LANGMAX}. + +@comment unistd.h +@comment X/Open +@item _SC_NL_MSGMAX +Inquire about the parameter corresponding to @code{NL_MSGMAX}. + +@comment unistd.h +@comment X/Open +@item _SC_NL_NMAX +Inquire about the parameter corresponding to @code{NL_NMAX}. + +@comment unistd.h +@comment X/Open +@item _SC_NL_SETMAX +Inquire about the parameter corresponding to @code{NL_SETMAX}. + +@comment unistd.h +@comment X/Open +@item _SC_NL_TEXTMAX +Inquire about the parameter corresponding to @code{NL_TEXTMAX}. +@end vtable + +@node Examples of Sysconf +@subsection Examples of @code{sysconf} + +We recommend that you first test for a macro definition for the +parameter you are interested in, and call @code{sysconf} only if the +macro is not defined. For example, here is how to test whether job +control is supported: + +@smallexample +@group +int +have_job_control (void) +@{ +#ifdef _POSIX_JOB_CONTROL + return 1; +#else + int value = sysconf (_SC_JOB_CONTROL); + if (value < 0) + /* @r{If the system is that badly wedged,} + @r{there's no use trying to go on.} */ + fatal (strerror (errno)); + return value; +#endif +@} +@end group +@end smallexample + +Here is how to get the value of a numeric limit: + +@smallexample +int +get_child_max () +@{ +#ifdef CHILD_MAX + return CHILD_MAX; +#else + int value = sysconf (_SC_CHILD_MAX); + if (value < 0) + fatal (strerror (errno)); + return value; +#endif +@} +@end smallexample + +@node Minimums +@section Minimum Values for General Capacity Limits + +Here are the names for the POSIX minimum upper bounds for the system +limit parameters. The significance of these values is that you can +safely push to these limits without checking whether the particular +system you are using can go that far. + +@vtable @code +@comment limits.h +@comment POSIX.1 +@item _POSIX_AIO_LISTIO_MAX +The most restrictive limit permitted by POSIX for the maximum number of +I/O operations that can be specified in a list I/O call. The value of +this constant is @code{2}; thus you can add up to two new entries +of the list of outstanding operations. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_AIO_MAX +The most restrictive limit permitted by POSIX for the maximum number of +outstanding asynchronous I/O operations. The value of this constant is +@code{1}. So you cannot expect that you can issue more than one +operation and immediately continue with the normal work, receiving the +notifications asynchronously. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_ARG_MAX +The value of this macro is the most restrictive limit permitted by POSIX +for the maximum combined length of the @var{argv} and @var{environ} +arguments that can be passed to the @code{exec} functions. +Its value is @code{4096}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_CHILD_MAX +The value of this macro is the most restrictive limit permitted by POSIX +for the maximum number of simultaneous processes per real user ID. Its +value is @code{6}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_NGROUPS_MAX +The value of this macro is the most restrictive limit permitted by POSIX +for the maximum number of supplementary group IDs per process. Its +value is @code{0}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_OPEN_MAX +The value of this macro is the most restrictive limit permitted by POSIX +for the maximum number of files that a single process can have open +simultaneously. Its value is @code{16}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_SSIZE_MAX +The value of this macro is the most restrictive limit permitted by POSIX +for the maximum value that can be stored in an object of type +@code{ssize_t}. Its value is @code{32767}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_STREAM_MAX +The value of this macro is the most restrictive limit permitted by POSIX +for the maximum number of streams that a single process can have open +simultaneously. Its value is @code{8}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_TZNAME_MAX +The value of this macro is the most restrictive limit permitted by POSIX +for the maximum length of a time zone name. Its value is @code{3}. + +@comment limits.h +@comment POSIX.2 +@item _POSIX2_RE_DUP_MAX +The value of this macro is the most restrictive limit permitted by POSIX +for the numbers used in the @samp{\@{@var{min},@var{max}\@}} construct +in a regular expression. Its value is @code{255}. +@end vtable + +@node Limits for Files +@section Limits on File System Capacity + +The POSIX.1 standard specifies a number of parameters that describe the +limitations of the file system. It's possible for the system to have a +fixed, uniform limit for a parameter, but this isn't the usual case. On +most systems, it's possible for different file systems (and, for some +parameters, even different files) to have different maximum limits. For +example, this is very likely if you use NFS to mount some of the file +systems from other machines. + +@pindex limits.h +Each of the following macros is defined in @file{limits.h} only if the +system has a fixed, uniform limit for the parameter in question. If the +system allows different file systems or files to have different limits, +then the macro is undefined; use @code{pathconf} or @code{fpathconf} to +find out the limit that applies to a particular file. @xref{Pathconf}. + +Each parameter also has another macro, with a name starting with +@samp{_POSIX}, which gives the lowest value that the limit is allowed to +have on @emph{any} POSIX system. @xref{File Minimums}. + +@cindex limits, link count of files +@comment limits.h (optional) +@comment POSIX.1 +@deftypevr Macro int LINK_MAX +The uniform system limit (if any) for the number of names for a given +file. @xref{Hard Links}. +@end deftypevr + +@cindex limits, terminal input queue +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int MAX_CANON +The uniform system limit (if any) for the amount of text in a line of +input when input editing is enabled. @xref{Canonical or Not}. +@end deftypevr + +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int MAX_INPUT +The uniform system limit (if any) for the total number of characters +typed ahead as input. @xref{I/O Queues}. +@end deftypevr + +@cindex limits, file name length +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int NAME_MAX +The uniform system limit (if any) for the length of a file name component, not +including the terminating null character. + +@strong{Portability Note:} On some systems, @theglibc{} defines +@code{NAME_MAX}, but does not actually enforce this limit. +@end deftypevr + +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int PATH_MAX +The uniform system limit (if any) for the length of an entire file name (that +is, the argument given to system calls such as @code{open}), including the +terminating null character. + +@strong{Portability Note:} @Theglibc{} does not enforce this limit +even if @code{PATH_MAX} is defined. +@end deftypevr + +@cindex limits, pipe buffer size +@comment limits.h +@comment POSIX.1 +@deftypevr Macro int PIPE_BUF +The uniform system limit (if any) for the number of bytes that can be +written atomically to a pipe. If multiple processes are writing to the +same pipe simultaneously, output from different processes might be +interleaved in chunks of this size. @xref{Pipes and FIFOs}. +@end deftypevr + +These are alternative macro names for some of the same information. + +@comment dirent.h +@comment BSD +@deftypevr Macro int MAXNAMLEN +This is the BSD name for @code{NAME_MAX}. It is defined in +@file{dirent.h}. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypevr Macro int FILENAME_MAX +The value of this macro is an integer constant expression that +represents the maximum length of a file name string. It is defined in +@file{stdio.h}. + +Unlike @code{PATH_MAX}, this macro is defined even if there is no actual +limit imposed. In such a case, its value is typically a very large +number. @strong{This is always the case on @gnuhurdsystems{}.} + +@strong{Usage Note:} Don't use @code{FILENAME_MAX} as the size of an +array in which to store a file name! You can't possibly make an array +that big! Use dynamic allocation (@pxref{Memory Allocation}) instead. +@end deftypevr + +@node Options for Files +@section Optional Features in File Support + +POSIX defines certain system-specific options in the system calls for +operating on files. Some systems support these options and others do +not. Since these options are provided in the kernel, not in the +library, simply using @theglibc{} does not guarantee that any of these +features is supported; it depends on the system you are using. They can +also vary between file systems on a single machine. + +@pindex unistd.h +This section describes the macros you can test to determine whether a +particular option is supported on your machine. If a given macro is +defined in @file{unistd.h}, then its value says whether the +corresponding feature is supported. (A value of @code{-1} indicates no; +any other value indicates yes.) If the macro is undefined, it means +particular files may or may not support the feature. + +Since all the machines that support @theglibc{} also support NFS, +one can never make a general statement about whether all file systems +support the @code{_POSIX_CHOWN_RESTRICTED} and @code{_POSIX_NO_TRUNC} +features. So these names are never defined as macros in @theglibc{}. + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro int _POSIX_CHOWN_RESTRICTED +If this option is in effect, the @code{chown} function is restricted so +that the only changes permitted to nonprivileged processes is to change +the group owner of a file to either be the effective group ID of the +process, or one of its supplementary group IDs. @xref{File Owner}. +@end deftypevr + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro int _POSIX_NO_TRUNC +If this option is in effect, file name components longer than +@code{NAME_MAX} generate an @code{ENAMETOOLONG} error. Otherwise, file +name components that are too long are silently truncated. +@end deftypevr + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro {unsigned char} _POSIX_VDISABLE +This option is only meaningful for files that are terminal devices. +If it is enabled, then handling for special control characters can +be disabled individually. @xref{Special Characters}. +@end deftypevr + +@pindex unistd.h +If one of these macros is undefined, that means that the option might be +in effect for some files and not for others. To inquire about a +particular file, call @code{pathconf} or @code{fpathconf}. +@xref{Pathconf}. + +@node File Minimums +@section Minimum Values for File System Limits + +Here are the names for the POSIX minimum upper bounds for some of the +above parameters. The significance of these values is that you can +safely push to these limits without checking whether the particular +system you are using can go that far. In most cases @gnusystems{} do not +have these strict limitations. The actual limit should be requested if +necessary. + +@vtable @code +@comment limits.h +@comment POSIX.1 +@item _POSIX_LINK_MAX +The most restrictive limit permitted by POSIX for the maximum value of a +file's link count. The value of this constant is @code{8}; thus, you +can always make up to eight names for a file without running into a +system limit. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_MAX_CANON +The most restrictive limit permitted by POSIX for the maximum number of +bytes in a canonical input line from a terminal device. The value of +this constant is @code{255}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_MAX_INPUT +The most restrictive limit permitted by POSIX for the maximum number of +bytes in a terminal device input queue (or typeahead buffer). +@xref{Input Modes}. The value of this constant is @code{255}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_NAME_MAX +The most restrictive limit permitted by POSIX for the maximum number of +bytes in a file name component. The value of this constant is +@code{14}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_PATH_MAX +The most restrictive limit permitted by POSIX for the maximum number of +bytes in a file name. The value of this constant is @code{256}. + +@comment limits.h +@comment POSIX.1 +@item _POSIX_PIPE_BUF +The most restrictive limit permitted by POSIX for the maximum number of +bytes that can be written atomically to a pipe. The value of this +constant is @code{512}. + +@comment limits.h +@comment POSIX.1 +@item SYMLINK_MAX +Maximum number of bytes in a symbolic link. + +@comment limits.h +@comment POSIX.1 +@item POSIX_REC_INCR_XFER_SIZE +Recommended increment for file transfer sizes between the +@code{POSIX_REC_MIN_XFER_SIZE} and @code{POSIX_REC_MAX_XFER_SIZE} +values. + +@comment limits.h +@comment POSIX.1 +@item POSIX_REC_MAX_XFER_SIZE +Maximum recommended file transfer size. + +@comment limits.h +@comment POSIX.1 +@item POSIX_REC_MIN_XFER_SIZE +Minimum recommended file transfer size. + +@comment limits.h +@comment POSIX.1 +@item POSIX_REC_XFER_ALIGN +Recommended file transfer buffer alignment. +@end vtable + +@node Pathconf +@section Using @code{pathconf} + +When your machine allows different files to have different values for a +file system parameter, you can use the functions in this section to find +out the value that applies to any particular file. + +These functions and the associated constants for the @var{parameter} +argument are declared in the header file @file{unistd.h}. + +@comment unistd.h +@comment POSIX.1 +@deftypefun {long int} pathconf (const char *@var{filename}, int @var{parameter}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c When __statfs_link_max finds an ext* filesystem, it may read +@c /proc/mounts or similar as a mntent stream. +@c __statfs_chown_restricted may read from +@c /proc/sys/fs/xfs/restrict_chown as a file descriptor. +This function is used to inquire about the limits that apply to +the file named @var{filename}. + +The @var{parameter} argument should be one of the @samp{_PC_} constants +listed below. + +The normal return value from @code{pathconf} is the value you requested. +A value of @code{-1} is returned both if the implementation does not +impose a limit, and in case of an error. In the former case, +@code{errno} is not set, while in the latter case, @code{errno} is set +to indicate the cause of the problem. So the only way to use this +function robustly is to store @code{0} into @code{errno} just before +calling it. + +Besides the usual file name errors (@pxref{File Name Errors}), +the following error condition is defined for this function: + +@table @code +@item EINVAL +The value of @var{parameter} is invalid, or the implementation doesn't +support the @var{parameter} for the specific file. +@end table +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun {long int} fpathconf (int @var{filedes}, int @var{parameter}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c Same caveats as pathconf. +This is just like @code{pathconf} except that an open file descriptor +is used to specify the file for which information is requested, instead +of a file name. + +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item EINVAL +The value of @var{parameter} is invalid, or the implementation doesn't +support the @var{parameter} for the specific file. +@end table +@end deftypefun + +Here are the symbolic constants that you can use as the @var{parameter} +argument to @code{pathconf} and @code{fpathconf}. The values are all +integer constants. + +@vtable @code +@comment unistd.h +@comment POSIX.1 +@item _PC_LINK_MAX +Inquire about the value of @code{LINK_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_MAX_CANON +Inquire about the value of @code{MAX_CANON}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_MAX_INPUT +Inquire about the value of @code{MAX_INPUT}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_NAME_MAX +Inquire about the value of @code{NAME_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_PATH_MAX +Inquire about the value of @code{PATH_MAX}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_PIPE_BUF +Inquire about the value of @code{PIPE_BUF}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_CHOWN_RESTRICTED +Inquire about the value of @code{_POSIX_CHOWN_RESTRICTED}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_NO_TRUNC +Inquire about the value of @code{_POSIX_NO_TRUNC}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_VDISABLE +Inquire about the value of @code{_POSIX_VDISABLE}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_SYNC_IO +Inquire about the value of @code{_POSIX_SYNC_IO}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_ASYNC_IO +Inquire about the value of @code{_POSIX_ASYNC_IO}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_PRIO_IO +Inquire about the value of @code{_POSIX_PRIO_IO}. + +@comment unistd.h +@comment LFS +@item _PC_FILESIZEBITS +Inquire about the availability of large files on the filesystem. + +@comment unistd.h +@comment POSIX.1 +@item _PC_REC_INCR_XFER_SIZE +Inquire about the value of @code{POSIX_REC_INCR_XFER_SIZE}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_REC_MAX_XFER_SIZE +Inquire about the value of @code{POSIX_REC_MAX_XFER_SIZE}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_REC_MIN_XFER_SIZE +Inquire about the value of @code{POSIX_REC_MIN_XFER_SIZE}. + +@comment unistd.h +@comment POSIX.1 +@item _PC_REC_XFER_ALIGN +Inquire about the value of @code{POSIX_REC_XFER_ALIGN}. +@end vtable + +@strong{Portability Note:} On some systems, @theglibc{} does not +enforce @code{_PC_NAME_MAX} or @code{_PC_PATH_MAX} limits. + +@node Utility Limits +@section Utility Program Capacity Limits + +The POSIX.2 standard specifies certain system limits that you can access +through @code{sysconf} that apply to utility behavior rather than the +behavior of the library or the operating system. + +@Theglibc{} defines macros for these limits, and @code{sysconf} +returns values for them if you ask; but these values convey no +meaningful information. They are simply the smallest values that +POSIX.2 permits. + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int BC_BASE_MAX +The largest value of @code{obase} that the @code{bc} utility is +guaranteed to support. +@end deftypevr + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int BC_DIM_MAX +The largest number of elements in one array that the @code{bc} utility +is guaranteed to support. +@end deftypevr + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int BC_SCALE_MAX +The largest value of @code{scale} that the @code{bc} utility is +guaranteed to support. +@end deftypevr + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int BC_STRING_MAX +The largest number of characters in one string constant that the +@code{bc} utility is guaranteed to support. +@end deftypevr + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int COLL_WEIGHTS_MAX +The largest number of weights that can necessarily be used in defining +the collating sequence for a locale. +@end deftypevr + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int EXPR_NEST_MAX +The maximum number of expressions that can be nested within parentheses +by the @code{expr} utility. +@end deftypevr + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int LINE_MAX +The largest text line that the text-oriented POSIX.2 utilities can +support. (If you are using the GNU versions of these utilities, then +there is no actual limit except that imposed by the available virtual +memory, but there is no way that the library can tell you this.) +@end deftypevr + +@comment limits.h +@comment POSIX.2 +@deftypevr Macro int EQUIV_CLASS_MAX +The maximum number of weights that can be assigned to an entry of the +@code{LC_COLLATE} category @samp{order} keyword in a locale definition. +@Theglibc{} does not presently support locale definitions. +@end deftypevr + +@node Utility Minimums +@section Minimum Values for Utility Limits + +@vtable @code +@comment limits.h +@comment POSIX.2 +@item _POSIX2_BC_BASE_MAX +The most restrictive limit permitted by POSIX.2 for the maximum value of +@code{obase} in the @code{bc} utility. Its value is @code{99}. + +@comment limits.h +@comment POSIX.2 +@item _POSIX2_BC_DIM_MAX +The most restrictive limit permitted by POSIX.2 for the maximum size of +an array in the @code{bc} utility. Its value is @code{2048}. + +@comment limits.h +@comment POSIX.2 +@item _POSIX2_BC_SCALE_MAX +The most restrictive limit permitted by POSIX.2 for the maximum value of +@code{scale} in the @code{bc} utility. Its value is @code{99}. + +@comment limits.h +@comment POSIX.2 +@item _POSIX2_BC_STRING_MAX +The most restrictive limit permitted by POSIX.2 for the maximum size of +a string constant in the @code{bc} utility. Its value is @code{1000}. + +@comment limits.h +@comment POSIX.2 +@item _POSIX2_COLL_WEIGHTS_MAX +The most restrictive limit permitted by POSIX.2 for the maximum number +of weights that can necessarily be used in defining the collating +sequence for a locale. Its value is @code{2}. + +@comment limits.h +@comment POSIX.2 +@item _POSIX2_EXPR_NEST_MAX +The most restrictive limit permitted by POSIX.2 for the maximum number +of expressions nested within parenthesis when using the @code{expr} utility. +Its value is @code{32}. + +@comment limits.h +@comment POSIX.2 +@item _POSIX2_LINE_MAX +The most restrictive limit permitted by POSIX.2 for the maximum size of +a text line that the text utilities can handle. Its value is +@code{2048}. + +@comment limits.h +@comment POSIX.2 +@item _POSIX2_EQUIV_CLASS_MAX +The most restrictive limit permitted by POSIX.2 for the maximum number +of weights that can be assigned to an entry of the @code{LC_COLLATE} +category @samp{order} keyword in a locale definition. Its value is +@code{2}. @Theglibc{} does not presently support locale +definitions. +@end vtable + +@node String Parameters +@section String-Valued Parameters + +POSIX.2 defines a way to get string-valued parameters from the operating +system with the function @code{confstr}: + +@comment unistd.h +@comment POSIX.2 +@deftypefun size_t confstr (int @var{parameter}, char *@var{buf}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function reads the value of a string-valued system parameter, +storing the string into @var{len} bytes of memory space starting at +@var{buf}. The @var{parameter} argument should be one of the +@samp{_CS_} symbols listed below. + +The normal return value from @code{confstr} is the length of the string +value that you asked for. If you supply a null pointer for @var{buf}, +then @code{confstr} does not try to store the string; it just returns +its length. A value of @code{0} indicates an error. + +If the string you asked for is too long for the buffer (that is, longer +than @code{@var{len} - 1}), then @code{confstr} stores just that much +(leaving room for the terminating null character). You can tell that +this has happened because @code{confstr} returns a value greater than or +equal to @var{len}. + +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EINVAL +The value of the @var{parameter} is invalid. +@end table +@end deftypefun + +Currently there is just one parameter you can read with @code{confstr}: + +@vtable @code +@comment unistd.h +@comment POSIX.2 +@item _CS_PATH +This parameter's value is the recommended default path for searching for +executable files. This is the path that a user has by default just +after logging in. + +@comment unistd.h +@comment Unix98 +@item _CS_LFS_CFLAGS +The returned string specifies which additional flags must be given to +the C compiler if a source is compiled using the +@code{_LARGEFILE_SOURCE} feature select macro; @pxref{Feature Test Macros}. + +@comment unistd.h +@comment Unix98 +@item _CS_LFS_LDFLAGS +The returned string specifies which additional flags must be given to +the linker if a source is compiled using the +@code{_LARGEFILE_SOURCE} feature select macro; @pxref{Feature Test Macros}. + +@comment unistd.h +@comment Unix98 +@item _CS_LFS_LIBS +The returned string specifies which additional libraries must be linked +to the application if a source is compiled using the +@code{_LARGEFILE_SOURCE} feature select macro; @pxref{Feature Test Macros}. + +@comment unistd.h +@comment Unix98 +@item _CS_LFS_LINTFLAGS +The returned string specifies which additional flags must be given to +the lint tool if a source is compiled using the +@code{_LARGEFILE_SOURCE} feature select macro; @pxref{Feature Test Macros}. + +@comment unistd.h +@comment Unix98 +@item _CS_LFS64_CFLAGS +The returned string specifies which additional flags must be given to +the C compiler if a source is compiled using the +@code{_LARGEFILE64_SOURCE} feature select macro; @pxref{Feature Test Macros}. + +@comment unistd.h +@comment Unix98 +@item _CS_LFS64_LDFLAGS +The returned string specifies which additional flags must be given to +the linker if a source is compiled using the +@code{_LARGEFILE64_SOURCE} feature select macro; @pxref{Feature Test Macros}. + +@comment unistd.h +@comment Unix98 +@item _CS_LFS64_LIBS +The returned string specifies which additional libraries must be linked +to the application if a source is compiled using the +@code{_LARGEFILE64_SOURCE} feature select macro; @pxref{Feature Test Macros}. + +@comment unistd.h +@comment Unix98 +@item _CS_LFS64_LINTFLAGS +The returned string specifies which additional flags must be given to +the lint tool if a source is compiled using the +@code{_LARGEFILE64_SOURCE} feature select macro; @pxref{Feature Test Macros}. +@end vtable + +The way to use @code{confstr} without any arbitrary limit on string size +is to call it twice: first call it to get the length, allocate the +buffer accordingly, and then call @code{confstr} again to fill the +buffer, like this: + +@smallexample +@group +char * +get_default_path (void) +@{ + size_t len = confstr (_CS_PATH, NULL, 0); + char *buffer = (char *) xmalloc (len); + + if (confstr (_CS_PATH, buf, len + 1) == 0) + @{ + free (buffer); + return NULL; + @} + + return buffer; +@} +@end group +@end smallexample diff --git a/REORG.TODO/manual/contrib.texi b/REORG.TODO/manual/contrib.texi new file mode 100644 index 0000000000..cc52f60779 --- /dev/null +++ b/REORG.TODO/manual/contrib.texi @@ -0,0 +1,449 @@ +@node Contributors, Free Manuals, Platform, Top +@c %MENU% Who wrote what parts of the GNU C Library +@appendix Contributors to @theglibc{} + +@Theglibc{} project would like to thank its many contributors. +Without them the project would not have been nearly as successful as +it has been. Any omissions in this list are accidental. Feel free to +file a bug in bugzilla if you have been left out or some of your +contributions are not listed. Please keep this list in alphabetical +order. + +@itemize @bullet + +@item +Nick Alcock for contributing fixes to allow @theglibc{} to be built with the +stack smashing protector enabled. + +@item +Ryan S. Arnold for his improvements for Linux on PowerPC and his direction +as FSF Project Steward for @theglibc{}. + +@item +Miles Bader for writing the @code{argp} argument-parsing package, and the +@code{argz}/@code{envz} interfaces. + +@item +Jeff Bailey for his maintainership of the HPPA architecture. + +@item +Petr Baudis for bug fixes and testing. + +@item +Stephen R. van den Berg for contributing a highly-optimized +@code{strstr} function. + +@item +Ondrej Bilka for contributing optimized string routines for x64 and various +fixes. + +@item +Eric Blake for adding O(n) implementations of @code{memmem}, +@code{strstr} and @code{strcasestr}. + +@item +Philip Blundell for the ports to Linux/ARM +(@code{arm-@var{ANYTHING}-linuxaout}) and ARM standalone +(@code{arm-@var{ANYTHING}-none}), as well as for parts of the IPv6 +support code. + +@item +Per Bothner for the implementation of the @code{libio} library which +is used to implement @code{stdio} functions. + +@item +Mark Brown for his direction as part of @theglibc{} steering +committee. + +@item +Thomas Bushnell for his contributions to Hurd. + +@item +Wilco Dijkstra for various fixes. + +@item +Liubov Dmitrieva for optimized string and math functions on x86-64 and +x86. + +@item +Ulrich Drepper for his many contributions in almost all parts of +@theglibc{}, including: +@itemize @bullet +@item internationalization support, including the @code{locale} and +@code{localedef} utilities. +@item Linux i386/ELF support +@item the @code{hsearch} and @code{drand48} families of functions, +reentrant @samp{@dots{}@code{_r}} versions of the @code{random} +family; System V shared memory and IPC support code +@item several highly-optimized string functions for i@var{x}86 +processors +@item many math functions +@item the character conversion functions (@code{iconv}) +@item the @code{ftw} and @code{nftw} functions +@item the floating-point printing function used by @code{printf} and friends +and the floating-point reading function used by @code{scanf}, +@code{strtod} and friends +@item the @code{catgets} support and the entire suite of multi-byte +and wide-character support functions (@file{wctype.h}, @file{wchar.h}, etc.). +@item versioning of objects on the symbol level +@end itemize + +@item +Wilco Dijkstra for various fixes. + +@item +Richard Earnshaw for continued support and fixes to the various ARM +machine files. + +@item +Paul Eggert for the @code{mktime} function and for his direction as +part of @theglibc{} steering committee. + +@item +Steve Ellcey for various fixes. + +@item +Tulio Magno Quites Machado Filho for adding a new class of installed +headers for low-level platform-specific functionality and one such for +PowerPC and various fixes. + +@item +Mike Frysinger for his maintaining of the IA64 architecture and for +testing and bug fixing. + +@item +Martin Galvan for contributing gdb pretty printer support to glibc and adding +an initial set of pretty printers for structures in the POSIX Threads library. + +@item +Michael Glad for the DES encryption function @code{crypt} and related +functions. + +@item +Wolfram Gloger for contributing the memory allocation functions +functions @code{malloc}, @code{realloc} and @code{free} and related +code. + +@item +Torbj@"orn Granlund for fast implementations of many of the string +functions (@code{memcpy}, @code{strlen}, etc.). + +@item +Michael J. Haertel for writing the merge sort function @code{qsort} +and malloc checking functions like @code{mcheck}. + +@item +Bruno Haible for his improvements to the @code{iconv} and locale +implementations. + +@item +Richard Henderson for the port to Linux on Alpha +(@code{alpha-@var{anything}-linux}). + +@item +David Holsgrove for the port to Linux on MicroBlaze. + +@item +Daniel Jacobowitz for various fixes and enhancements. + +@item +Andreas Jaeger for the port to Linux on x86-64 +(@code{x86_64-@var{anything}-linux} and his work on Linux for MIPS +(@code{mips-@var{anything}-linux}), implementing the @file{ldconfig} +program, providing a test suite for the math library and for his +direction as part of @theglibc{} steering committee. + +@item +Aurelien Jarno for various fixes. + +@item +Rical Jasan for contributing various fixes in @theglibc{} manual. + +@item +Jakub Jelinek for implementing a number of checking functions and for +his direction as part of @theglibc{} steering committee. + +@item +Geoffrey Keating for the port to Linux on PowerPC +(@code{powerpc-@var{anything}-linux}). + +@item +Brendan Kehoe for contributing the port to the MIPS DECStation running +Ultrix 4 (@code{mips-dec-ultrix4}) and the port to the DEC Alpha +running OSF/1 (@code{alpha-dec-osf1}). + +@item +Mark Kettenis for implementing the @code{utmpx} interface and a utmp +daemon, and for a Hesiod NSS module. + +@item +Andi Kleen for implementing pthreads lock elision with TSX. + +@item +Kazumoto Kojima for the port of the Mach and Hurd code to the MIPS +architecture (@code{mips-@var{anything}-gnu}) and for his work on the +SH architecture. + +@item +Maxim Kuvyrkov for various fixes. + +@item +Andreas Krebbel for his work on Linux for s390 and s390x. + +@item +Thorsten Kukuk for providing an implementation for NIS (YP) and NIS+, +securelevel 0, 1 and 2 and for the implementation for a caching daemon +for NSS (@file{nscd}). + +@item +Jeff Law for various fixes. + +@item +Doug Lea for contributing the memory allocation functions +@code{malloc}, @code{realloc} and @code{free} and related +code. + +@item +Chris Leonard for various fixes and enhancements to localedata. + +@item +Stefan Liebler for various fixes. + +@item +Hongjiu Lu for providing the support for a Linux 32-bit runtime +environment under x86-64 (x32), for porting to Linux on IA64, for +improved string functions, a framework for testing IFUNC +implementations, and many bug fixes. + +@item +Luis Machado for optimized functions on PowerPC. + +@item +David J. MacKenzie for his contribution to the @code{getopt} +function and writing the @file{tar.h} header. + +@item +Greg McGary for adding runtime support for bounds checking. + +@item +Roland McGrath for writing most of @theglibc{} originally, for his +work on the Hurd port, his direction as part of @theglibc{} steering +committee and as FSF Project Steward for @theglibc{}, and for many bug +fixes and reviewing of contributions. + +@item +Allan McRae for various fixes. + +@item +Jason Merrill for the port to the Sequent Symmetry running Dynix +version 3 (@code{i386-sequent-bsd}). + +@item +Chris Metcalf for the port to Linux/Tile +(@code{tilegx-@var{anything}-linux} and +@code{tilepro-@var{anything}-linux}). + +@item +David Miller for contributing the port to Linux/Sparc +(@code{sparc*-@var{anything}-linux}). + +@item +Alan Modra for his improvements for Linux on PowerPC. + +@item +David Mosberger-Tang for contributing the port to Linux/Alpha +(@code{alpha-@var{anything}-linux}). + +@item +Stephen Moshier for implementing some 128-bit long double format math +functions. + +@item +Stephen Munroe for his port to Linux on PowerPC64 +(@code{powerpc64-@var{anything}-linux}) and for adding optimized +implementations for PowerPC. + +@item +Paul E. Murphy for various fixes on PowerPC. + +@item +Joseph S. Myers for numerous bug fixes for the libm functions, for +his maintainership of the ARM and MIPS architectures, improving +cross-compilation and cross-testing of @theglibc{}, expanded +coverage of conformtest, merging the ports/ subdirectory +into the @glibcadj{} main repository and his direction as FSF Project +Steward for @theglibc{}. + +@item +Szabolcs Nagy for various fixes. + +@item +Will Newton for contributing some optimized string functions and pointer +encryption support for ARM and various fixes. + +@item +Carlos O'Donell for his maintainership of the HPPA architecture, for +maintaining @theglibc{} web pages and wiki, for his direction as FSF +Project Steward for @theglibc{} and various bug fixes. + +@item +Alexandre Oliva for adding TLS descriptors for LD and GD on x86 and +x86-64, for the am33 port, for completing the MIPS n64/n32/o32 multilib +port, for thread-safety, async-signal safety and async-cancellation +safety documentation in the manual, for his direction as FSF Project +Maintainer and for various fixes. + +@item +Paul Pluzhnikov for various fixes. + +@item +Marek Polacek for various fixes. + +@item +Siddhesh Poyarekar for various fixes, an implementation of a framework for +performance benchmarking of functions and implementing the tunables +infrastructure. + +@item +Tom Quinn for contributing the startup code to support SunOS shared +libraries and the port to SGI machines running Irix 4 +(@code{mips-sgi-irix4}). + +@item +Torvald Riegel for the implementation of new algorithms for semaphores, +pthread_rwlock and condition variables. + +@item +Maciej W. Rozycki for various fixes. + +@item +Pravin Satpute for writing sorting rules for some Indian languages. + +@item +Douglas C. Schmidt for writing the quick sort function used as a +fallback by @code{qsort}. + +@item +Will Schmidt for optimized string functions on PowerPC. + +@item +Andreas Schwab for the port to Linux/m68k +(@code{m68k-@var{anything}-linux}) and for his direction as part of +@theglibc{} steering committee. + +@item +Martin Schwidefsky for porting to Linux on s390 +(@code{s390-@var{anything}-linux}) and s390x +(@code{s390x-@var{anything}-linux}). + +@item +Thomas Schwinge for his contribution to Hurd and the SH architecture. + +@item +Andrew Senkevich for contributing vector math function implementations for x86. + +@item +Carlos Eduardo Seo for optimized functions on PowerPC. + +@item +Marcus Shawcroft for contributing the AArch64 port. + +@item +Franz Sirl for various fixes. + +@item +Jes Sorensen for porting to Linux on IA64 (@code{ia64-@var{anything}-linux}). + +@item +Rajalakshmi Srinivasaraghavan for various fixes and optimizations on PowerPC. + +@item +Richard Stallman for his contribution to the @code{getopt} function. + +@item +Alfred M. Szmidt for various fixes. + +@item +Ian Lance Taylor for contributing the port to the MIPS DECStation +running Ultrix 4 (@code{mips-dec-ultrix4}). + +@item +Samuel Thibault for improving the Hurd port. + +@item +Tim Waugh for the implementation of the POSIX.2 @code{wordexp} +function family. + +@item +Zack Weinberg for the @code{explicit_bzero} implementation and for various +fixes. + +@item +Eric Youngdale for implementing versioning of objects on the symbol level. + +@item +Adhemerval Zanella for optimized functions on PowerPC and various fixes. + +@end itemize + +Some code in @theglibc{} comes from other projects and might be under +a different license: + +@itemize @bullet +@item +The timezone support code is derived from the public-domain timezone +package by Arthur David Olson and his many contributors. + +@item +Some of the support code for Mach is taken from Mach 3.0 by CMU; +the file @file{if_ppp.h} is also copyright by CMU, but under a different license; +see the file @file{LICENSES} for the text of the licenses. + +@item +The random number generation functions @code{random}, @code{srandom}, +@code{setstate} and @code{initstate}, which are also the basis for the +@code{rand} and @code{srand} functions, were written by Earl T. Cohen +for the University of California at Berkeley and are copyrighted by the +Regents of the University of California. They have undergone minor +changes to fit into @theglibc{} and to fit the @w{ISO C} standard, +but the functional code is Berkeley's.@refill + +@item +The Internet-related code (most of the @file{inet} subdirectory) and +several other miscellaneous functions and header files have been +included from 4.4 BSD with little or no modification. The copying +permission notice for this code can be found in the file @file{LICENSES} +in the source distribution. + +@item +The @code{getaddrinfo} and @code{getnameinfo} functions and supporting +code were written by Craig Metz; see the file @file{LICENSES} for +details on their licensing. + +@item +The DNS resolver code is taken directly from BIND 4.9.5, which +includes copyrighted code from UC Berkeley and from Digital Equipment +Corporation. See the file @file{LICENSES} for the text of the DEC license. + +@item +The code to support Sun RPC is taken verbatim from Sun's +@w{@sc{rpcsrc-4.0}} distribution; see the file @file{LICENSES} for the +text of the license. + +@item +The math functions are taken from @code{fdlibm-5.1} by Sun +Microsystems, as modified by J.T. Conklin, Ian Lance Taylor, +Ulrich Drepper, Andreas Schwab, and Roland McGrath. + +@item +Many of the IEEE 64-bit double precision math functions +(in the @file{sysdeps/ieee754/dbl-64} subdirectory) come +from the IBM Accurate Mathematical Library, contributed by IBM. + +@item +Many of the IA64 math functions are taken from a collection of +``Highly Optimized Mathematical Functions for Itanium'' that Intel +makes available under a free license; see the file @file{LICENSES} for +details. + +@end itemize diff --git a/REORG.TODO/manual/creature.texi b/REORG.TODO/manual/creature.texi new file mode 100644 index 0000000000..23218bbac3 --- /dev/null +++ b/REORG.TODO/manual/creature.texi @@ -0,0 +1,249 @@ +@node Feature Test Macros +@subsection Feature Test Macros + +@cindex feature test macros +The exact set of features available when you compile a source file +is controlled by which @dfn{feature test macros} you define. + +If you compile your programs using @samp{gcc -ansi}, you get only the +@w{ISO C} library features, unless you explicitly request additional +features by defining one or more of the feature macros. +@xref{Invoking GCC,, GNU CC Command Options, gcc.info, The GNU CC Manual}, +for more information about GCC options.@refill + +You should define these macros by using @samp{#define} preprocessor +directives at the top of your source code files. These directives +@emph{must} come before any @code{#include} of a system header file. It +is best to make them the very first thing in the file, preceded only by +comments. You could also use the @samp{-D} option to GCC, but it's +better if you make the source files indicate their own meaning in a +self-contained way. + +This system exists to allow the library to conform to multiple standards. +Although the different standards are often described as supersets of each +other, they are usually incompatible because larger standards require +functions with names that smaller ones reserve to the user program. This +is not mere pedantry --- it has been a problem in practice. For instance, +some non-GNU programs define functions named @code{getline} that have +nothing to do with this library's @code{getline}. They would not be +compilable if all features were enabled indiscriminately. + +This should not be used to verify that a program conforms to a limited +standard. It is insufficient for this purpose, as it will not protect you +from including header files outside the standard, or relying on semantics +undefined within the standard. + +@comment (none) +@comment POSIX.1 +@defvr Macro _POSIX_SOURCE +If you define this macro, then the functionality from the POSIX.1 +standard (IEEE Standard 1003.1) is available, as well as all of the +@w{ISO C} facilities. + +The state of @code{_POSIX_SOURCE} is irrelevant if you define the +macro @code{_POSIX_C_SOURCE} to a positive integer. +@end defvr + +@comment (none) +@comment POSIX.2 +@defvr Macro _POSIX_C_SOURCE +Define this macro to a positive integer to control which POSIX +functionality is made available. The greater the value of this macro, +the more functionality is made available. + +If you define this macro to a value greater than or equal to @code{1}, +then the functionality from the 1990 edition of the POSIX.1 standard +(IEEE Standard 1003.1-1990) is made available. + +If you define this macro to a value greater than or equal to @code{2}, +then the functionality from the 1992 edition of the POSIX.2 standard +(IEEE Standard 1003.2-1992) is made available. + +If you define this macro to a value greater than or equal to @code{199309L}, +then the functionality from the 1993 edition of the POSIX.1b standard +(IEEE Standard 1003.1b-1993) is made available. + +Greater values for @code{_POSIX_C_SOURCE} will enable future extensions. +The POSIX standards process will define these values as necessary, and +@theglibc{} should support them some time after they become standardized. +The 1996 edition of POSIX.1 (ISO/IEC 9945-1: 1996) states that +if you define @code{_POSIX_C_SOURCE} to a value greater than +or equal to @code{199506L}, then the functionality from the 1996 +edition is made available. +@end defvr + +@comment (none) +@comment X/Open +@defvr Macro _XOPEN_SOURCE +@comment (none) +@comment X/Open +@defvrx Macro _XOPEN_SOURCE_EXTENDED +If you define this macro, functionality described in the X/Open +Portability Guide is included. This is a superset of the POSIX.1 and +POSIX.2 functionality and in fact @code{_POSIX_SOURCE} and +@code{_POSIX_C_SOURCE} are automatically defined. + +As the unification of all Unices, functionality only available in +BSD and SVID is also included. + +If the macro @code{_XOPEN_SOURCE_EXTENDED} is also defined, even more +functionality is available. The extra functions will make all functions +available which are necessary for the X/Open Unix brand. + +If the macro @code{_XOPEN_SOURCE} has the value @math{500} this includes +all functionality described so far plus some new definitions from the +Single Unix Specification, @w{version 2}. +@end defvr + +@comment (NONE) +@comment X/Open +@defvr Macro _LARGEFILE_SOURCE +If this macro is defined some extra functions are available which +rectify a few shortcomings in all previous standards. Specifically, +the functions @code{fseeko} and @code{ftello} are available. Without +these functions the difference between the @w{ISO C} interface +(@code{fseek}, @code{ftell}) and the low-level POSIX interface +(@code{lseek}) would lead to problems. + +This macro was introduced as part of the Large File Support extension (LFS). +@end defvr + +@comment (NONE) +@comment X/Open +@defvr Macro _LARGEFILE64_SOURCE +If you define this macro an additional set of functions is made available +which enables @w{32 bit} systems to use files of sizes beyond +the usual limit of 2GB. This interface is not available if the system +does not support files that large. On systems where the natural file +size limit is greater than 2GB (i.e., on @w{64 bit} systems) the new +functions are identical to the replaced functions. + +The new functionality is made available by a new set of types and +functions which replace the existing ones. The names of these new objects +contain @code{64} to indicate the intention, e.g., @code{off_t} +vs. @code{off64_t} and @code{fseeko} vs. @code{fseeko64}. + +This macro was introduced as part of the Large File Support extension +(LFS). It is a transition interface for the period when @w{64 bit} +offsets are not generally used (see @code{_FILE_OFFSET_BITS}). +@end defvr + +@comment (NONE) +@comment X/Open +@defvr Macro _FILE_OFFSET_BITS +This macro determines which file system interface shall be used, one +replacing the other. Whereas @code{_LARGEFILE64_SOURCE} makes the @w{64 +bit} interface available as an additional interface, +@code{_FILE_OFFSET_BITS} allows the @w{64 bit} interface to +replace the old interface. + +If @code{_FILE_OFFSET_BITS} is undefined, or if it is defined to the +value @code{32}, nothing changes. The @w{32 bit} interface is used and +types like @code{off_t} have a size of @w{32 bits} on @w{32 bit} +systems. + +If the macro is defined to the value @code{64}, the large file interface +replaces the old interface. I.e., the functions are not made available +under different names (as they are with @code{_LARGEFILE64_SOURCE}). +Instead the old function names now reference the new functions, e.g., a +call to @code{fseeko} now indeed calls @code{fseeko64}. + +This macro should only be selected if the system provides mechanisms for +handling large files. On @w{64 bit} systems this macro has no effect +since the @code{*64} functions are identical to the normal functions. + +This macro was introduced as part of the Large File Support extension +(LFS). +@end defvr + +@comment (none) +@comment GNU +@defvr Macro _ISOC99_SOURCE +Until the revised @w{ISO C} standard is widely adopted the new features +are not automatically enabled. @Theglibc{} nevertheless has a complete +implementation of the new standard and to enable the new features the +macro @code{_ISOC99_SOURCE} should be defined. +@end defvr + +@comment (none) +@comment ISO +@defvr Macro __STDC_WANT_LIB_EXT2__ +If you define this macro to the value @code{1}, features from ISO/IEC +TR 24731-2:2010 (Dynamic Allocation Functions) are enabled. Only some +of the features from this TR are supported by @theglibc{}. +@end defvr + +@comment (none) +@comment ISO +@defvr Macro __STDC_WANT_IEC_60559_BFP_EXT__ +If you define this macro, features from ISO/IEC TS 18661-1:2014 +(Floating-point extensions for C: Binary floating-point arithmetic) +are enabled. Only some of the features from this TS are supported by +@theglibc{}. +@end defvr + +@comment (none) +@comment ISO +@defvr Macro __STDC_WANT_IEC_60559_FUNCS_EXT__ +If you define this macro, features from ISO/IEC TS 18661-4:2015 +(Floating-point extensions for C: Supplementary functions) are +enabled. Only some of the features from this TS are supported by +@theglibc{}. +@end defvr + +@comment (none) +@comment ISO +@defvr Macro __STDC_WANT_IEC_60559_TYPES_EXT__ +If you define this macro, features from ISO/IEC TS 18661-3:2015 +(Floating-point extensions for C: Interchange and extended types) are +enabled. Only some of the features from this TS are supported by +@theglibc{}. +@end defvr + +@comment (none) +@comment GNU +@defvr Macro _GNU_SOURCE +If you define this macro, everything is included: @w{ISO C89}, @w{ISO +C99}, POSIX.1, POSIX.2, BSD, SVID, X/Open, LFS, and GNU extensions. In +the cases where POSIX.1 conflicts with BSD, the POSIX definitions take +precedence. +@end defvr + +@comment (none) +@comment GNU +@defvr Macro _DEFAULT_SOURCE +If you define this macro, most features are included apart from +X/Open, LFS and GNU extensions: the effect is to enable features from +the 2008 edition of POSIX, as well as certain BSD and SVID features +without a separate feature test macro to control them. Defining this +macro, on its own and without using compiler options such as +@option{-ansi} or @option{-std=c99}, has the same effect as not +defining any feature test macros; defining it together with other +feature test macros, or when options such as @option{-ansi} are used, +enables those features even when the other options would otherwise +cause them to be disabled. +@end defvr + +@comment (none) +@comment GNU +@defvr Macro _REENTRANT +@defvrx Macro _THREAD_SAFE +These macros are obsolete. They have the same effect as defining +@code{_POSIX_C_SOURCE} with the value @code{199506L}. + +Some very old C libraries required one of these macros to be defined +for basic functionality (e.g.@: @code{getchar}) to be thread-safe. +@end defvr + +We recommend you use @code{_GNU_SOURCE} in new programs. If you don't +specify the @samp{-ansi} option to GCC, or other conformance options +such as @option{-std=c99}, and don't define any of these macros +explicitly, the effect is the same as defining @code{_DEFAULT_SOURCE} +to 1. + +When you define a feature test macro to request a larger class of features, +it is harmless to define in addition a feature test macro for a subset of +those features. For example, if you define @code{_POSIX_C_SOURCE}, then +defining @code{_POSIX_SOURCE} as well has no effect. Likewise, if you +define @code{_GNU_SOURCE}, then defining either @code{_POSIX_SOURCE} or +@code{_POSIX_C_SOURCE} as well has no effect. diff --git a/REORG.TODO/manual/crypt.texi b/REORG.TODO/manual/crypt.texi new file mode 100644 index 0000000000..59e42652ab --- /dev/null +++ b/REORG.TODO/manual/crypt.texi @@ -0,0 +1,542 @@ +@c This node must have no pointers. +@node Cryptographic Functions +@c @node Cryptographic Functions, Debugging Support, System Configuration, Top +@chapter DES Encryption and Password Handling +@c %MENU% DES encryption and password handling + +On many systems, it is unnecessary to have any kind of user +authentication; for instance, a workstation which is not connected to a +network probably does not need any user authentication, because to use +the machine an intruder must have physical access. + +Sometimes, however, it is necessary to be sure that a user is authorized +to use some service a machine provides---for instance, to log in as a +particular user id (@pxref{Users and Groups}). One traditional way of +doing this is for each user to choose a secret @dfn{password}; then, the +system can ask someone claiming to be a user what the user's password +is, and if the person gives the correct password then the system can +grant the appropriate privileges. + +If all the passwords are just stored in a file somewhere, then this file +has to be very carefully protected. To avoid this, passwords are run +through a @dfn{one-way function}, a function which makes it difficult to +work out what its input was by looking at its output, before storing in +the file. + +@Theglibc{} provides a one-way function that is compatible with +the behavior of the @code{crypt} function introduced in FreeBSD 2.0. +It supports two one-way algorithms: one based on the MD5 +message-digest algorithm that is compatible with modern BSD systems, +and the other based on the Data Encryption Standard (DES) that is +compatible with Unix systems. + +@vindex AUTH_DES +@cindex FIPS 140-2 +It also provides support for Secure RPC, and some library functions that +can be used to perform normal DES encryption. The @code{AUTH_DES} +authentication flavor in Secure RPC, as provided by @theglibc{}, +uses DES and does not comply with FIPS 140-2 nor does any other use of DES +within @theglibc{}. It is recommended that Secure RPC should not be used +for systems that need to comply with FIPS 140-2 since all flavors of +encrypted authentication use normal DES. + +@menu +* Legal Problems:: This software can get you locked up, or worse. +* getpass:: Prompting the user for a password. +* crypt:: A one-way function for passwords. +* DES Encryption:: Routines for DES encryption. +* Unpredictable Bytes:: Randomness for cryptography purposes. +@end menu + +@node Legal Problems +@section Legal Problems + +Because of the continuously changing state of the law, it's not possible +to provide a definitive survey of the laws affecting cryptography. +Instead, this section warns you of some of the known trouble spots; this +may help you when you try to find out what the laws of your country are. + +Some countries require that you have a license to use, possess, or import +cryptography. These countries are believed to include Byelorussia, +Burma, India, Indonesia, Israel, Kazakhstan, Pakistan, Russia, and Saudi +Arabia. + +Some countries restrict the transmission of encrypted messages by radio; +some telecommunications carriers restrict the transmission of encrypted +messages over their network. + +Many countries have some form of export control for encryption software. +The Wassenaar Arrangement is a multilateral agreement between 33 +countries (Argentina, Australia, Austria, Belgium, Bulgaria, Canada, the +Czech Republic, Denmark, Finland, France, Germany, Greece, Hungary, +Ireland, Italy, Japan, Luxembourg, the Netherlands, New Zealand, Norway, +Poland, Portugal, the Republic of Korea, Romania, the Russian +Federation, the Slovak Republic, Spain, Sweden, Switzerland, Turkey, +Ukraine, the United Kingdom and the United States) which restricts some +kinds of encryption exports. Different countries apply the arrangement +in different ways; some do not allow the exception for certain kinds of +``public domain'' software (which would include this library), some +only restrict the export of software in tangible form, and others impose +significant additional restrictions. + +The United States has additional rules. This software would generally +be exportable under 15 CFR 740.13(e), which permits exports of +``encryption source code'' which is ``publicly available'' and which is +``not subject to an express agreement for the payment of a licensing fee or +royalty for commercial production or sale of any product developed with +the source code'' to most countries. + +The rules in this area are continuously changing. If you know of any +information in this manual that is out-of-date, please report it to +the bug database. @xref{Reporting Bugs}. + +@node getpass +@section Reading Passwords + +When reading in a password, it is desirable to avoid displaying it on +the screen, to help keep it secret. The following function handles this +in a convenient way. + +@comment unistd.h +@comment BSD +@deftypefun {char *} getpass (const char *@var{prompt}) +@safety{@prelim{}@mtunsafe{@mtasuterm{}}@asunsafe{@ascuheap{} @asulock{} @asucorrupt{}}@acunsafe{@acuterm{} @aculock{} @acucorrupt{}}} +@c This function will attempt to create a stream for terminal I/O, but +@c will fallback to stdio/stderr. It attempts to change the terminal +@c mode in a thread-unsafe way, write out the prompt, read the password, +@c then restore the terminal mode. It has a cleanup to close the stream +@c in case of (synchronous) cancellation, but not to restore the +@c terminal mode. + +@code{getpass} outputs @var{prompt}, then reads a string in from the +terminal without echoing it. It tries to connect to the real terminal, +@file{/dev/tty}, if possible, to encourage users not to put plaintext +passwords in files; otherwise, it uses @code{stdin} and @code{stderr}. +@code{getpass} also disables the INTR, QUIT, and SUSP characters on the +terminal using the @code{ISIG} terminal attribute (@pxref{Local Modes}). +The terminal is flushed before and after @code{getpass}, so that +characters of a mistyped password are not accidentally visible. + +In other C libraries, @code{getpass} may only return the first +@code{PASS_MAX} bytes of a password. @Theglibc{} has no limit, so +@code{PASS_MAX} is undefined. + +The prototype for this function is in @file{unistd.h}. @code{PASS_MAX} +would be defined in @file{limits.h}. +@end deftypefun + +This precise set of operations may not suit all possible situations. In +this case, it is recommended that users write their own @code{getpass} +substitute. For instance, a very simple substitute is as follows: + +@smallexample +@include mygetpass.c.texi +@end smallexample + +The substitute takes the same parameters as @code{getline} +(@pxref{Line Input}); the user must print any prompt desired. + +@node crypt +@section Encrypting Passwords + +@comment crypt.h +@comment BSD, SVID +@deftypefun {char *} crypt (const char *@var{key}, const char *@var{salt}) +@safety{@prelim{}@mtunsafe{@mtasurace{:crypt}}@asunsafe{@asucorrupt{} @asulock{} @ascuheap{} @ascudlopen{}}@acunsafe{@aculock{} @acsmem{}}} +@c Besides the obvious problem of returning a pointer into static +@c storage, the DES initializer takes an internal lock with the usual +@c set of problems for AS- and AC-Safety. The FIPS mode checker and the +@c NSS implementations of may leak file descriptors if canceled. The +@c The MD5, SHA256 and SHA512 implementations will malloc on long keys, +@c and NSS relies on dlopening, which brings about another can of worms. + +The @code{crypt} function takes a password, @var{key}, as a string, and +a @var{salt} character array which is described below, and returns a +printable ASCII string which starts with another salt. It is believed +that, given the output of the function, the best way to find a @var{key} +that will produce that output is to guess values of @var{key} until the +original value of @var{key} is found. + +The @var{salt} parameter does two things. Firstly, it selects which +algorithm is used, the MD5-based one or the DES-based one. Secondly, it +makes life harder for someone trying to guess passwords against a file +containing many passwords; without a @var{salt}, an intruder can make a +guess, run @code{crypt} on it once, and compare the result with all the +passwords. With a @var{salt}, the intruder must run @code{crypt} once +for each different salt. + +For the MD5-based algorithm, the @var{salt} should consist of the string +@code{$1$}, followed by up to 8 characters, terminated by either +another @code{$} or the end of the string. The result of @code{crypt} +will be the @var{salt}, followed by a @code{$} if the salt didn't end +with one, followed by 22 characters from the alphabet +@code{./0-9A-Za-z}, up to 34 characters total. Every character in the +@var{key} is significant. + +For the DES-based algorithm, the @var{salt} should consist of two +characters from the alphabet @code{./0-9A-Za-z}, and the result of +@code{crypt} will be those two characters followed by 11 more from the +same alphabet, 13 in total. Only the first 8 characters in the +@var{key} are significant. + +The MD5-based algorithm has no limit on the useful length of the +password used, and is slightly more secure. It is therefore preferred +over the DES-based algorithm. + +When the user enters their password for the first time, the @var{salt} +should be set to a new string which is reasonably random. To verify a +password against the result of a previous call to @code{crypt}, pass +the result of the previous call as the @var{salt}. +@end deftypefun + +The following short program is an example of how to use @code{crypt} the +first time a password is entered. Note that the @var{salt} generation +is just barely acceptable; in particular, it is not unique between +machines, and in many applications it would not be acceptable to let an +attacker know what time the user's password was last set. + +@smallexample +@include genpass.c.texi +@end smallexample + +The next program shows how to verify a password. It prompts the user +for a password and prints ``Access granted.'' if the user types +@code{GNU libc manual}. + +@smallexample +@include testpass.c.texi +@end smallexample + +@comment crypt.h +@comment GNU +@deftypefun {char *} crypt_r (const char *@var{key}, const char *@var{salt}, {struct crypt_data *} @var{data}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @asulock{} @ascuheap{} @ascudlopen{}}@acunsafe{@aculock{} @acsmem{}}} +@c Compared with crypt, this function fixes the @mtasurace:crypt +@c problem, but nothing else. + +The @code{crypt_r} function does the same thing as @code{crypt}, but +takes an extra parameter which includes space for its result (among +other things), so it can be reentrant. @code{data@w{->}initialized} must be +cleared to zero before the first time @code{crypt_r} is called. + +The @code{crypt_r} function is a GNU extension. +@end deftypefun + +The @code{crypt} and @code{crypt_r} functions are prototyped in the +header @file{crypt.h}. + +@node DES Encryption +@section DES Encryption + +@cindex FIPS 46-3 +The Data Encryption Standard is described in the US Government Federal +Information Processing Standards (FIPS) 46-3 published by the National +Institute of Standards and Technology. The DES has been very thoroughly +analyzed since it was developed in the late 1970s, and no new +significant flaws have been found. + +However, the DES uses only a 56-bit key (plus 8 parity bits), and a +machine has been built in 1998 which can search through all possible +keys in about 6 days, which cost about US$200000; faster searches would +be possible with more money. This makes simple DES insecure for most +purposes, and NIST no longer permits new US government systems +to use simple DES. + +For serious encryption functionality, it is recommended that one of the +many free encryption libraries be used instead of these routines. + +The DES is a reversible operation which takes a 64-bit block and a +64-bit key, and produces another 64-bit block. Usually the bits are +numbered so that the most-significant bit, the first bit, of each block +is numbered 1. + +Under that numbering, every 8th bit of the key (the 8th, 16th, and so +on) is not used by the encryption algorithm itself. But the key must +have odd parity; that is, out of bits 1 through 8, and 9 through 16, and +so on, there must be an odd number of `1' bits, and this completely +specifies the unused bits. + +@comment crypt.h +@comment BSD, SVID +@deftypefun void setkey (const char *@var{key}) +@safety{@prelim{}@mtunsafe{@mtasurace{:crypt}}@asunsafe{@asucorrupt{} @asulock{}}@acunsafe{@aculock{}}} +@c The static buffer stores the key, making it fundamentally +@c thread-unsafe. The locking issues are only in the initialization +@c path; cancelling the initialization will leave the lock held, it +@c would otherwise repeat the initialization on the next call. + +The @code{setkey} function sets an internal data structure to be an +expanded form of @var{key}. @var{key} is specified as an array of 64 +bits each stored in a @code{char}, the first bit is @code{key[0]} and +the 64th bit is @code{key[63]}. The @var{key} should have the correct +parity. +@end deftypefun + +@comment crypt.h +@comment BSD, SVID +@deftypefun void encrypt (char *@var{block}, int @var{edflag}) +@safety{@prelim{}@mtunsafe{@mtasurace{:crypt}}@asunsafe{@asucorrupt{} @asulock{}}@acunsafe{@aculock{}}} +@c Same issues as setkey. + +The @code{encrypt} function encrypts @var{block} if +@var{edflag} is 0, otherwise it decrypts @var{block}, using a key +previously set by @code{setkey}. The result is +placed in @var{block}. + +Like @code{setkey}, @var{block} is specified as an array of 64 bits each +stored in a @code{char}, but there are no parity bits in @var{block}. +@end deftypefun + +@comment crypt.h +@comment GNU +@deftypefun void setkey_r (const char *@var{key}, {struct crypt_data *} @var{data}) +@comment crypt.h +@comment GNU +@deftypefunx void encrypt_r (char *@var{block}, int @var{edflag}, {struct crypt_data *} @var{data}) +@c setkey_r: @safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @asulock{}}@acunsafe{@aculock{}}} +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @asulock{}}@acunsafe{@aculock{}}} + +These are reentrant versions of @code{setkey} and @code{encrypt}. The +only difference is the extra parameter, which stores the expanded +version of @var{key}. Before calling @code{setkey_r} the first time, +@code{data->initialized} must be cleared to zero. +@end deftypefun + +The @code{setkey_r} and @code{encrypt_r} functions are GNU extensions. +@code{setkey}, @code{encrypt}, @code{setkey_r}, and @code{encrypt_r} are +defined in @file{crypt.h}. + +@comment rpc/des_crypt.h +@comment SUNRPC +@deftypefun int ecb_crypt (char *@var{key}, char *@var{blocks}, unsigned int @var{len}, unsigned int @var{mode}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +The function @code{ecb_crypt} encrypts or decrypts one or more blocks +using DES. Each block is encrypted independently. + +The @var{blocks} and the @var{key} are stored packed in 8-bit bytes, so +that the first bit of the key is the most-significant bit of +@code{key[0]} and the 63rd bit of the key is stored as the +least-significant bit of @code{key[7]}. The @var{key} should have the +correct parity. + +@var{len} is the number of bytes in @var{blocks}. It should be a +multiple of 8 (so that there are a whole number of blocks to encrypt). +@var{len} is limited to a maximum of @code{DES_MAXDATA} bytes. + +The result of the encryption replaces the input in @var{blocks}. + +The @var{mode} parameter is the bitwise OR of two of the following: + +@vtable @code +@comment rpc/des_crypt.h +@comment SUNRPC +@item DES_ENCRYPT +This constant, used in the @var{mode} parameter, specifies that +@var{blocks} is to be encrypted. + +@comment rpc/des_crypt.h +@comment SUNRPC +@item DES_DECRYPT +This constant, used in the @var{mode} parameter, specifies that +@var{blocks} is to be decrypted. + +@comment rpc/des_crypt.h +@comment SUNRPC +@item DES_HW +This constant, used in the @var{mode} parameter, asks to use a hardware +device. If no hardware device is available, encryption happens anyway, +but in software. + +@comment rpc/des_crypt.h +@comment SUNRPC +@item DES_SW +This constant, used in the @var{mode} parameter, specifies that no +hardware device is to be used. +@end vtable + +The result of the function will be one of these values: + +@vtable @code +@comment rpc/des_crypt.h +@comment SUNRPC +@item DESERR_NONE +The encryption succeeded. + +@comment rpc/des_crypt.h +@comment SUNRPC +@item DESERR_NOHWDEVICE +The encryption succeeded, but there was no hardware device available. + +@comment rpc/des_crypt.h +@comment SUNRPC +@item DESERR_HWERROR +The encryption failed because of a hardware problem. + +@comment rpc/des_crypt.h +@comment SUNRPC +@item DESERR_BADPARAM +The encryption failed because of a bad parameter, for instance @var{len} +is not a multiple of 8 or @var{len} is larger than @code{DES_MAXDATA}. +@end vtable +@end deftypefun + +@comment rpc/des_crypt.h +@comment SUNRPC +@deftypefun int DES_FAILED (int @var{err}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns 1 if @var{err} is a `success' result code from +@code{ecb_crypt} or @code{cbc_crypt}, and 0 otherwise. +@end deftypefun + +@comment rpc/des_crypt.h +@comment SUNRPC +@deftypefun int cbc_crypt (char *@var{key}, char *@var{blocks}, unsigned int @var{len}, unsigned int @var{mode}, char *@var{ivec}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +The function @code{cbc_crypt} encrypts or decrypts one or more blocks +using DES in Cipher Block Chaining mode. + +For encryption in CBC mode, each block is exclusive-ored with @var{ivec} +before being encrypted, then @var{ivec} is replaced with the result of +the encryption, then the next block is processed. Decryption is the +reverse of this process. + +This has the advantage that blocks which are the same before being +encrypted are very unlikely to be the same after being encrypted, making +it much harder to detect patterns in the data. + +Usually, @var{ivec} is set to 8 random bytes before encryption starts. +Then the 8 random bytes are transmitted along with the encrypted data +(without themselves being encrypted), and passed back in as @var{ivec} +for decryption. Another possibility is to set @var{ivec} to 8 zeroes +initially, and have the first block encrypted consist of 8 random +bytes. + +Otherwise, all the parameters are similar to those for @code{ecb_crypt}. +@end deftypefun + +@comment rpc/des_crypt.h +@comment SUNRPC +@deftypefun void des_setparity (char *@var{key}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +The function @code{des_setparity} changes the 64-bit @var{key}, stored +packed in 8-bit bytes, to have odd parity by altering the low bits of +each byte. +@end deftypefun + +The @code{ecb_crypt}, @code{cbc_crypt}, and @code{des_setparity} +functions and their accompanying macros are all defined in the header +@file{rpc/des_crypt.h}. + +@node Unpredictable Bytes +@section Generating Unpredictable Bytes + +Some cryptographic applications (such as session key generation) need +unpredictable bytes. + +In general, application code should use a deterministic random bit +generator, which could call the @code{getentropy} function described +below internally to obtain randomness to seed the generator. The +@code{getrandom} function is intended for low-level applications which +need additional control over the blocking behavior. + +@comment sys/random.h +@comment GNU +@deftypefun int getentropy (void *@var{buffer}, size_t @var{length}) +@safety{@mtsafe{}@assafe{}@acsafe{}} + +This function writes @var{length} bytes of random data to the array +starting at @var{buffer}, which must be at most 256 bytes long. The +function returns zero on success. On failure, it returns @code{-1} and +@code{errno} is updated accordingly. + +The @code{getentropy} function is declared in the header file +@file{sys/random.h}. It is derived from OpenBSD. + +The @code{getentropy} function is not a cancellation point. A call to +@code{getentropy} can block if the system has just booted and the kernel +entropy pool has not yet been initialized. In this case, the function +will keep blocking even if a signal arrives, and return only after the +entropy pool has been initialized. + +The @code{getentropy} function can fail with several errors, some of +which are listed below. + +@table @code +@item ENOSYS +The kernel does not implement the required system call. + +@item EFAULT +The combination of @var{buffer} and @var{length} arguments specifies +an invalid memory range. + +@item EIO +More than 256 bytes of randomness have been requested, or the buffer +could not be overwritten with random data for an unspecified reason. + +@end table + +@end deftypefun + +@comment sys/random.h +@comment GNU +@deftypefun ssize_t getrandom (void *@var{buffer}, size_t @var{length}, unsigned int @var{flags}) +@safety{@mtsafe{}@assafe{}@acsafe{}} + +This function writes @var{length} bytes of random data to the array +starting at @var{buffer}. On success, this function returns the number +of bytes which have been written to the buffer (which can be less than +@var{length}). On error, @code{-1} is returned, and @code{errno} is +updated accordingly. + +The @code{getrandom} function is declared in the header file +@file{sys/random.h}. It is a GNU extension. + +The following flags are defined for the @var{flags} argument: + +@table @code +@item GRND_RANDOM +Use the @file{/dev/random} (blocking) pool instead of the +@file{/dev/urandom} (non-blocking) pool to obtain randomness. If the +@code{GRND_RANDOM} flag is specified, the @code{getrandom} function can +block even after the randomness source has been initialized. + +@item GRND_NONBLOCK +Instead of blocking, return to the caller immediately if no data is +available. +@end table + +The @code{getrandom} function is a cancellation point. + +Obtaining randomness from the @file{/dev/urandom} pool (i.e., a call +without the @code{GRND_RANDOM} flag) can block if the system has just +booted and the pool has not yet been initialized. + +The @code{getrandom} function can fail with several errors, some of +which are listed below. In addition, the function may not fill the +buffer completely and return a value less than @var{length}. + +@table @code +@item ENOSYS +The kernel does not implement the @code{getrandom} system call. + +@item EAGAIN +No random data was available and @code{GRND_NONBLOCK} was specified in +@var{flags}. + +@item EFAULT +The combination of @var{buffer} and @var{length} arguments specifies +an invalid memory range. + +@item EINTR +The system call was interrupted. During the system boot process, before +the kernel randomness pool is initialized, this can happen even if +@var{flags} is zero. + +@item EINVAL +The @var{flags} argument contains an invalid combination of flags. +@end table + +@end deftypefun diff --git a/REORG.TODO/manual/ctype.texi b/REORG.TODO/manual/ctype.texi new file mode 100644 index 0000000000..818c095d13 --- /dev/null +++ b/REORG.TODO/manual/ctype.texi @@ -0,0 +1,843 @@ +@node Character Handling, String and Array Utilities, Memory, Top +@c %MENU% Character testing and conversion functions +@chapter Character Handling + +Programs that work with characters and strings often need to classify a +character---is it alphabetic, is it a digit, is it whitespace, and so +on---and perform case conversion operations on characters. The +functions in the header file @file{ctype.h} are provided for this +purpose. +@pindex ctype.h + +Since the choice of locale and character set can alter the +classifications of particular character codes, all of these functions +are affected by the current locale. (More precisely, they are affected +by the locale currently selected for character classification---the +@code{LC_CTYPE} category; see @ref{Locale Categories}.) + +The @w{ISO C} standard specifies two different sets of functions. The +one set works on @code{char} type characters, the other one on +@code{wchar_t} wide characters (@pxref{Extended Char Intro}). + +@menu +* Classification of Characters:: Testing whether characters are + letters, digits, punctuation, etc. + +* Case Conversion:: Case mapping, and the like. +* Classification of Wide Characters:: Character class determination for + wide characters. +* Using Wide Char Classes:: Notes on using the wide character + classes. +* Wide Character Case Conversion:: Mapping of wide characters. +@end menu + +@node Classification of Characters, Case Conversion, , Character Handling +@section Classification of Characters +@cindex character testing +@cindex classification of characters +@cindex predicates on characters +@cindex character predicates + +This section explains the library functions for classifying characters. +For example, @code{isalpha} is the function to test for an alphabetic +character. It takes one argument, the character to test, and returns a +nonzero integer if the character is alphabetic, and zero otherwise. You +would use it like this: + +@smallexample +if (isalpha (c)) + printf ("The character `%c' is alphabetic.\n", c); +@end smallexample + +Each of the functions in this section tests for membership in a +particular class of characters; each has a name starting with @samp{is}. +Each of them takes one argument, which is a character to test, and +returns an @code{int} which is treated as a boolean value. The +character argument is passed as an @code{int}, and it may be the +constant value @code{EOF} instead of a real character. + +The attributes of any given character can vary between locales. +@xref{Locales}, for more information on locales.@refill + +These functions are declared in the header file @file{ctype.h}. +@pindex ctype.h + +@cindex lower-case character +@comment ctype.h +@comment ISO +@deftypefun int islower (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c The is* macros call __ctype_b_loc to get the ctype array from the +@c current locale, and then index it by c. __ctype_b_loc reads from +@c thread-local memory the (indirect) pointer to the ctype array, which +@c may involve one word access to the global locale object, if that's +@c the active locale for the thread, and the array, being part of the +@c locale data, is undeletable, so there's no thread-safety issue. We +@c might want to mark these with @mtslocale to flag to callers that +@c changing locales might affect them, even if not these simpler +@c functions. +Returns true if @var{c} is a lower-case letter. The letter need not be +from the Latin alphabet, any alphabet representable is valid. +@end deftypefun + +@cindex upper-case character +@comment ctype.h +@comment ISO +@deftypefun int isupper (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is an upper-case letter. The letter need not be +from the Latin alphabet, any alphabet representable is valid. +@end deftypefun + +@cindex alphabetic character +@comment ctype.h +@comment ISO +@deftypefun int isalpha (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is an alphabetic character (a letter). If +@code{islower} or @code{isupper} is true of a character, then +@code{isalpha} is also true. + +In some locales, there may be additional characters for which +@code{isalpha} is true---letters which are neither upper case nor lower +case. But in the standard @code{"C"} locale, there are no such +additional characters. +@end deftypefun + +@cindex digit character +@cindex decimal digit character +@comment ctype.h +@comment ISO +@deftypefun int isdigit (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a decimal digit (@samp{0} through @samp{9}). +@end deftypefun + +@cindex alphanumeric character +@comment ctype.h +@comment ISO +@deftypefun int isalnum (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is an alphanumeric character (a letter or +number); in other words, if either @code{isalpha} or @code{isdigit} is +true of a character, then @code{isalnum} is also true. +@end deftypefun + +@cindex hexadecimal digit character +@comment ctype.h +@comment ISO +@deftypefun int isxdigit (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a hexadecimal digit. +Hexadecimal digits include the normal decimal digits @samp{0} through +@samp{9} and the letters @samp{A} through @samp{F} and +@samp{a} through @samp{f}. +@end deftypefun + +@cindex punctuation character +@comment ctype.h +@comment ISO +@deftypefun int ispunct (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a punctuation character. +This means any printing character that is not alphanumeric or a space +character. +@end deftypefun + +@cindex whitespace character +@comment ctype.h +@comment ISO +@deftypefun int isspace (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a @dfn{whitespace} character. In the standard +@code{"C"} locale, @code{isspace} returns true for only the standard +whitespace characters: + +@table @code +@item ' ' +space + +@item '\f' +formfeed + +@item '\n' +newline + +@item '\r' +carriage return + +@item '\t' +horizontal tab + +@item '\v' +vertical tab +@end table +@end deftypefun + +@cindex blank character +@comment ctype.h +@comment ISO +@deftypefun int isblank (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a blank character; that is, a space or a tab. +This function was originally a GNU extension, but was added in @w{ISO C99}. +@end deftypefun + +@cindex graphic character +@comment ctype.h +@comment ISO +@deftypefun int isgraph (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a graphic character; that is, a character +that has a glyph associated with it. The whitespace characters are not +considered graphic. +@end deftypefun + +@cindex printing character +@comment ctype.h +@comment ISO +@deftypefun int isprint (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a printing character. Printing characters +include all the graphic characters, plus the space (@samp{ }) character. +@end deftypefun + +@cindex control character +@comment ctype.h +@comment ISO +@deftypefun int iscntrl (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a control character (that is, a character that +is not a printing character). +@end deftypefun + +@cindex ASCII character +@comment ctype.h +@comment SVID, BSD +@deftypefun int isascii (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns true if @var{c} is a 7-bit @code{unsigned char} value that fits +into the US/UK ASCII character set. This function is a BSD extension +and is also an SVID extension. +@end deftypefun + +@node Case Conversion, Classification of Wide Characters, Classification of Characters, Character Handling +@section Case Conversion +@cindex character case conversion +@cindex case conversion of characters +@cindex converting case of characters + +This section explains the library functions for performing conversions +such as case mappings on characters. For example, @code{toupper} +converts any character to upper case if possible. If the character +can't be converted, @code{toupper} returns it unchanged. + +These functions take one argument of type @code{int}, which is the +character to convert, and return the converted character as an +@code{int}. If the conversion is not applicable to the argument given, +the argument is returned unchanged. + +@strong{Compatibility Note:} In pre-@w{ISO C} dialects, instead of +returning the argument unchanged, these functions may fail when the +argument is not suitable for the conversion. Thus for portability, you +may need to write @code{islower(c) ? toupper(c) : c} rather than just +@code{toupper(c)}. + +These functions are declared in the header file @file{ctype.h}. +@pindex ctype.h + +@comment ctype.h +@comment ISO +@deftypefun int tolower (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c The to* macros/functions call different functions that use different +@c arrays than those of__ctype_b_loc, but the access patterns and +@c thus safety guarantees are the same. +If @var{c} is an upper-case letter, @code{tolower} returns the corresponding +lower-case letter. If @var{c} is not an upper-case letter, +@var{c} is returned unchanged. +@end deftypefun + +@comment ctype.h +@comment ISO +@deftypefun int toupper (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If @var{c} is a lower-case letter, @code{toupper} returns the corresponding +upper-case letter. Otherwise @var{c} is returned unchanged. +@end deftypefun + +@comment ctype.h +@comment SVID, BSD +@deftypefun int toascii (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function converts @var{c} to a 7-bit @code{unsigned char} value +that fits into the US/UK ASCII character set, by clearing the high-order +bits. This function is a BSD extension and is also an SVID extension. +@end deftypefun + +@comment ctype.h +@comment SVID +@deftypefun int _tolower (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is identical to @code{tolower}, and is provided for compatibility +with the SVID. @xref{SVID}.@refill +@end deftypefun + +@comment ctype.h +@comment SVID +@deftypefun int _toupper (int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is identical to @code{toupper}, and is provided for compatibility +with the SVID. +@end deftypefun + + +@node Classification of Wide Characters, Using Wide Char Classes, Case Conversion, Character Handling +@section Character class determination for wide characters + +@w{Amendment 1} to @w{ISO C90} defines functions to classify wide +characters. Although the original @w{ISO C90} standard already defined +the type @code{wchar_t}, no functions operating on them were defined. + +The general design of the classification functions for wide characters +is more general. It allows extensions to the set of available +classifications, beyond those which are always available. The POSIX +standard specifies how extensions can be made, and this is already +implemented in the @glibcadj{} implementation of the @code{localedef} +program. + +The character class functions are normally implemented with bitsets, +with a bitset per character. For a given character, the appropriate +bitset is read from a table and a test is performed as to whether a +certain bit is set. Which bit is tested for is determined by the +class. + +For the wide character classification functions this is made visible. +There is a type classification type defined, a function to retrieve this +value for a given class, and a function to test whether a given +character is in this class, using the classification value. On top of +this the normal character classification functions as used for +@code{char} objects can be defined. + +@comment wctype.h +@comment ISO +@deftp {Data type} wctype_t +The @code{wctype_t} can hold a value which represents a character class. +The only defined way to generate such a value is by using the +@code{wctype} function. + +@pindex wctype.h +This type is defined in @file{wctype.h}. +@end deftp + +@comment wctype.h +@comment ISO +@deftypefun wctype_t wctype (const char *@var{property}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c Although the source code of wctype contains multiple references to +@c the locale, that could each reference different locale_data objects +@c should the global locale object change while active, the compiler can +@c and does combine them all into a single dereference that resolves +@c once to the LCTYPE locale object used throughout the function, so it +@c is safe in (optimized) practice, if not in theory, even when the +@c locale changes. Ideally we'd explicitly save the resolved +@c locale_data object to make it visibly safe instead of safe only under +@c compiler optimizations, but given the decision that setlocale is +@c MT-Unsafe, all this would afford us would be the ability to not mark +@c this function with @mtslocale. +@code{wctype} returns a value representing a class of wide +characters which is identified by the string @var{property}. Besides +some standard properties each locale can define its own ones. In case +no property with the given name is known for the current locale +selected for the @code{LC_CTYPE} category, the function returns zero. + +@noindent +The properties known in every locale are: + +@multitable @columnfractions .25 .25 .25 .25 +@item +@code{"alnum"} @tab @code{"alpha"} @tab @code{"cntrl"} @tab @code{"digit"} +@item +@code{"graph"} @tab @code{"lower"} @tab @code{"print"} @tab @code{"punct"} +@item +@code{"space"} @tab @code{"upper"} @tab @code{"xdigit"} +@end multitable + +@pindex wctype.h +This function is declared in @file{wctype.h}. +@end deftypefun + +To test the membership of a character to one of the non-standard classes +the @w{ISO C} standard defines a completely new function. + +@comment wctype.h +@comment ISO +@deftypefun int iswctype (wint_t @var{wc}, wctype_t @var{desc}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c The compressed lookup table returned by wctype is read-only. +This function returns a nonzero value if @var{wc} is in the character +class specified by @var{desc}. @var{desc} must previously be returned +by a successful call to @code{wctype}. + +@pindex wctype.h +This function is declared in @file{wctype.h}. +@end deftypefun + +To make it easier to use the commonly-used classification functions, +they are defined in the C library. There is no need to use +@code{wctype} if the property string is one of the known character +classes. In some situations it is desirable to construct the property +strings, and then it is important that @code{wctype} can also handle the +standard classes. + +@cindex alphanumeric character +@comment wctype.h +@comment ISO +@deftypefun int iswalnum (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c The implicit wctype call in the isw* functions is actually an +@c optimized version because the category has a known offset, but the +@c wctype is equally safe when optimized, unsafe with changing locales +@c if not optimized (thus @mtslocale). Since it's not a macro, we +@c always optimize, and the locale can't change in any MT-Safe way, it's +@c fine. The test whether wc is ASCII to use the non-wide is* +@c macro/function doesn't bring any other safety issues: the test does +@c not depend on the locale, and each path after the decision resolves +@c the locale object only once. +This function returns a nonzero value if @var{wc} is an alphanumeric +character (a letter or number); in other words, if either @code{iswalpha} +or @code{iswdigit} is true of a character, then @code{iswalnum} is also +true. + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("alnum")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex alphabetic character +@comment wctype.h +@comment ISO +@deftypefun int iswalpha (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is an alphabetic character (a letter). If +@code{iswlower} or @code{iswupper} is true of a character, then +@code{iswalpha} is also true. + +In some locales, there may be additional characters for which +@code{iswalpha} is true---letters which are neither upper case nor lower +case. But in the standard @code{"C"} locale, there are no such +additional characters. + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("alpha")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex control character +@comment wctype.h +@comment ISO +@deftypefun int iswcntrl (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a control character (that is, a character that +is not a printing character). + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("cntrl")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex digit character +@comment wctype.h +@comment ISO +@deftypefun int iswdigit (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a digit (e.g., @samp{0} through @samp{9}). +Please note that this function does not only return a nonzero value for +@emph{decimal} digits, but for all kinds of digits. A consequence is +that code like the following will @strong{not} work unconditionally for +wide characters: + +@smallexample +n = 0; +while (iswdigit (*wc)) + @{ + n *= 10; + n += *wc++ - L'0'; + @} +@end smallexample + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("digit")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex graphic character +@comment wctype.h +@comment ISO +@deftypefun int iswgraph (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a graphic character; that is, a character +that has a glyph associated with it. The whitespace characters are not +considered graphic. + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("graph")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex lower-case character +@comment ctype.h +@comment ISO +@deftypefun int iswlower (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a lower-case letter. The letter need not be +from the Latin alphabet, any alphabet representable is valid. + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("lower")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex printing character +@comment wctype.h +@comment ISO +@deftypefun int iswprint (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a printing character. Printing characters +include all the graphic characters, plus the space (@samp{ }) character. + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("print")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex punctuation character +@comment wctype.h +@comment ISO +@deftypefun int iswpunct (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a punctuation character. +This means any printing character that is not alphanumeric or a space +character. + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("punct")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex whitespace character +@comment wctype.h +@comment ISO +@deftypefun int iswspace (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a @dfn{whitespace} character. In the standard +@code{"C"} locale, @code{iswspace} returns true for only the standard +whitespace characters: + +@table @code +@item L' ' +space + +@item L'\f' +formfeed + +@item L'\n' +newline + +@item L'\r' +carriage return + +@item L'\t' +horizontal tab + +@item L'\v' +vertical tab +@end table + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("space")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex upper-case character +@comment wctype.h +@comment ISO +@deftypefun int iswupper (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is an upper-case letter. The letter need not be +from the Latin alphabet, any alphabet representable is valid. + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("upper")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@cindex hexadecimal digit character +@comment wctype.h +@comment ISO +@deftypefun int iswxdigit (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a hexadecimal digit. +Hexadecimal digits include the normal decimal digits @samp{0} through +@samp{9} and the letters @samp{A} through @samp{F} and +@samp{a} through @samp{f}. + +@noindent +This function can be implemented using + +@smallexample +iswctype (wc, wctype ("xdigit")) +@end smallexample + +@pindex wctype.h +It is declared in @file{wctype.h}. +@end deftypefun + +@Theglibc{} also provides a function which is not defined in the +@w{ISO C} standard but which is available as a version for single byte +characters as well. + +@cindex blank character +@comment wctype.h +@comment ISO +@deftypefun int iswblank (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +Returns true if @var{wc} is a blank character; that is, a space or a tab. +This function was originally a GNU extension, but was added in @w{ISO C99}. +It is declared in @file{wchar.h}. +@end deftypefun + +@node Using Wide Char Classes, Wide Character Case Conversion, Classification of Wide Characters, Character Handling +@section Notes on using the wide character classes + +The first note is probably not astonishing but still occasionally a +cause of problems. The @code{isw@var{XXX}} functions can be implemented +using macros and in fact, @theglibc{} does this. They are still +available as real functions but when the @file{wctype.h} header is +included the macros will be used. This is the same as the +@code{char} type versions of these functions. + +The second note covers something new. It can be best illustrated by a +(real-world) example. The first piece of code is an excerpt from the +original code. It is truncated a bit but the intention should be clear. + +@smallexample +int +is_in_class (int c, const char *class) +@{ + if (strcmp (class, "alnum") == 0) + return isalnum (c); + if (strcmp (class, "alpha") == 0) + return isalpha (c); + if (strcmp (class, "cntrl") == 0) + return iscntrl (c); + @dots{} + return 0; +@} +@end smallexample + +Now, with the @code{wctype} and @code{iswctype} you can avoid the +@code{if} cascades, but rewriting the code as follows is wrong: + +@smallexample +int +is_in_class (int c, const char *class) +@{ + wctype_t desc = wctype (class); + return desc ? iswctype ((wint_t) c, desc) : 0; +@} +@end smallexample + +The problem is that it is not guaranteed that the wide character +representation of a single-byte character can be found using casting. +In fact, usually this fails miserably. The correct solution to this +problem is to write the code as follows: + +@smallexample +int +is_in_class (int c, const char *class) +@{ + wctype_t desc = wctype (class); + return desc ? iswctype (btowc (c), desc) : 0; +@} +@end smallexample + +@xref{Converting a Character}, for more information on @code{btowc}. +Note that this change probably does not improve the performance +of the program a lot since the @code{wctype} function still has to make +the string comparisons. It gets really interesting if the +@code{is_in_class} function is called more than once for the +same class name. In this case the variable @var{desc} could be computed +once and reused for all the calls. Therefore the above form of the +function is probably not the final one. + + +@node Wide Character Case Conversion, , Using Wide Char Classes, Character Handling +@section Mapping of wide characters. + +The classification functions are also generalized by the @w{ISO C} +standard. Instead of just allowing the two standard mappings, a +locale can contain others. Again, the @code{localedef} program +already supports generating such locale data files. + +@comment wctype.h +@comment ISO +@deftp {Data Type} wctrans_t +This data type is defined as a scalar type which can hold a value +representing the locale-dependent character mapping. There is no way to +construct such a value apart from using the return value of the +@code{wctrans} function. + +@pindex wctype.h +@noindent +This type is defined in @file{wctype.h}. +@end deftp + +@comment wctype.h +@comment ISO +@deftypefun wctrans_t wctrans (const char *@var{property}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c Similar implementation, same caveats as wctype. +The @code{wctrans} function has to be used to find out whether a named +mapping is defined in the current locale selected for the +@code{LC_CTYPE} category. If the returned value is non-zero, you can use +it afterwards in calls to @code{towctrans}. If the return value is +zero no such mapping is known in the current locale. + +Beside locale-specific mappings there are two mappings which are +guaranteed to be available in every locale: + +@multitable @columnfractions .5 .5 +@item +@code{"tolower"} @tab @code{"toupper"} +@end multitable + +@pindex wctype.h +@noindent +These functions are declared in @file{wctype.h}. +@end deftypefun + +@comment wctype.h +@comment ISO +@deftypefun wint_t towctrans (wint_t @var{wc}, wctrans_t @var{desc}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Same caveats as iswctype. +@code{towctrans} maps the input character @var{wc} +according to the rules of the mapping for which @var{desc} is a +descriptor, and returns the value it finds. @var{desc} must be +obtained by a successful call to @code{wctrans}. + +@pindex wctype.h +@noindent +This function is declared in @file{wctype.h}. +@end deftypefun + +For the generally available mappings, the @w{ISO C} standard defines +convenient shortcuts so that it is not necessary to call @code{wctrans} +for them. + +@comment wctype.h +@comment ISO +@deftypefun wint_t towlower (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c Same caveats as iswalnum, just using a wctrans rather than a wctype +@c table. +If @var{wc} is an upper-case letter, @code{towlower} returns the corresponding +lower-case letter. If @var{wc} is not an upper-case letter, +@var{wc} is returned unchanged. + +@noindent +@code{towlower} can be implemented using + +@smallexample +towctrans (wc, wctrans ("tolower")) +@end smallexample + +@pindex wctype.h +@noindent +This function is declared in @file{wctype.h}. +@end deftypefun + +@comment wctype.h +@comment ISO +@deftypefun wint_t towupper (wint_t @var{wc}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +If @var{wc} is a lower-case letter, @code{towupper} returns the corresponding +upper-case letter. Otherwise @var{wc} is returned unchanged. + +@noindent +@code{towupper} can be implemented using + +@smallexample +towctrans (wc, wctrans ("toupper")) +@end smallexample + +@pindex wctype.h +@noindent +This function is declared in @file{wctype.h}. +@end deftypefun + +The same warnings given in the last section for the use of the wide +character classification functions apply here. It is not possible to +simply cast a @code{char} type value to a @code{wint_t} and use it as an +argument to @code{towctrans} calls. diff --git a/REORG.TODO/manual/debug.texi b/REORG.TODO/manual/debug.texi new file mode 100644 index 0000000000..ac5121b061 --- /dev/null +++ b/REORG.TODO/manual/debug.texi @@ -0,0 +1,136 @@ +@node Debugging Support +@c @node Debugging Support, POSIX Threads, Cryptographic Functions, Top +@c %MENU% Functions to help debugging applications +@chapter Debugging support + +Applications are usually debugged using dedicated debugger programs. +But sometimes this is not possible and, in any case, it is useful to +provide the developer with as much information as possible at the time +the problems are experienced. For this reason a few functions are +provided which a program can use to help the developer more easily +locate the problem. + + +@menu +* Backtraces:: Obtaining and printing a back trace of the + current stack. +@end menu + + +@node Backtraces, , , Debugging Support +@section Backtraces + +@cindex backtrace +@cindex backtrace_symbols +@cindex backtrace_fd +A @dfn{backtrace} is a list of the function calls that are currently +active in a thread. The usual way to inspect a backtrace of a program +is to use an external debugger such as gdb. However, sometimes it is +useful to obtain a backtrace programmatically from within a program, +e.g., for the purposes of logging or diagnostics. + +The header file @file{execinfo.h} declares three functions that obtain +and manipulate backtraces of the current thread. +@pindex execinfo.h + +@comment execinfo.h +@comment GNU +@deftypefun int backtrace (void **@var{buffer}, int @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asuinit{} @ascuheap{} @ascudlopen{} @ascuplugin{} @asulock{}}@acunsafe{@acuinit{} @acsmem{} @aculock{} @acsfd{}}} +@c The generic implementation just does pointer chasing within the local +@c stack, without any guarantees that this will handle signal frames +@c correctly, so it's AS-Unsafe to begin with. However, most (all?) +@c arches defer to libgcc_s's _Unwind_* implementation, dlopening +@c libgcc_s.so to that end except in a static version of libc. +@c libgcc_s's implementation may in turn defer to libunwind. We can't +@c assume those implementations are AS- or AC-safe, but even if we +@c could, our own initialization path isn't, and libgcc's implementation +@c calls malloc and performs internal locking, so... +The @code{backtrace} function obtains a backtrace for the current +thread, as a list of pointers, and places the information into +@var{buffer}. The argument @var{size} should be the number of +@w{@code{void *}} elements that will fit into @var{buffer}. The return +value is the actual number of entries of @var{buffer} that are obtained, +and is at most @var{size}. + +The pointers placed in @var{buffer} are actually return addresses +obtained by inspecting the stack, one return address per stack frame. + +Note that certain compiler optimizations may interfere with obtaining a +valid backtrace. Function inlining causes the inlined function to not +have a stack frame; tail call optimization replaces one stack frame with +another; frame pointer elimination will stop @code{backtrace} from +interpreting the stack contents correctly. +@end deftypefun + +@comment execinfo.h +@comment GNU +@deftypefun {char **} backtrace_symbols (void *const *@var{buffer}, int @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @aculock{}}} +@c Collects info returned by _dl_addr in an auto array, allocates memory +@c for the whole return buffer with malloc then sprintfs into it storing +@c pointers to the strings into the array entries in the buffer. +@c _dl_addr takes the recursive dl_load_lock then calls +@c _dl_find_dso_for_object and determine_info. +@c _dl_find_dso_for_object calls _dl-addr_inside_object. +@c All of them are safe as long as the lock is held. +@c @asucorrupt? It doesn't look like the dynamic loader's data +@c structures could be in an inconsistent state that would cause +@c malfunction here. +The @code{backtrace_symbols} function translates the information +obtained from the @code{backtrace} function into an array of strings. +The argument @var{buffer} should be a pointer to an array of addresses +obtained via the @code{backtrace} function, and @var{size} is the number +of entries in that array (the return value of @code{backtrace}). + +The return value is a pointer to an array of strings, which has +@var{size} entries just like the array @var{buffer}. Each string +contains a printable representation of the corresponding element of +@var{buffer}. It includes the function name (if this can be +determined), an offset into the function, and the actual return address +(in hexadecimal). + +Currently, the function name and offset can only be obtained on systems that +use the ELF binary format for programs and libraries. On other systems, +only the hexadecimal return address will be present. Also, you may need +to pass additional flags to the linker to make the function names +available to the program. (For example, on systems using GNU ld, you +must pass @code{-rdynamic}.) + +The return value of @code{backtrace_symbols} is a pointer obtained via +the @code{malloc} function, and it is the responsibility of the caller +to @code{free} that pointer. Note that only the return value need be +freed, not the individual strings. + +The return value is @code{NULL} if sufficient memory for the strings +cannot be obtained. +@end deftypefun + +@comment execinfo.h +@comment GNU +@deftypefun void backtrace_symbols_fd (void *const *@var{buffer}, int @var{size}, int @var{fd}) +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{}}} +@c Single loop of _dl_addr over addresses, collecting info into an iovec +@c written out with a writev call per iteration. Addresses and offsets +@c are converted to hex in auto buffers, so the only potential issue +@c here is leaking the dl lock in case of cancellation. +The @code{backtrace_symbols_fd} function performs the same translation +as the function @code{backtrace_symbols} function. Instead of returning +the strings to the caller, it writes the strings to the file descriptor +@var{fd}, one per line. It does not use the @code{malloc} function, and +can therefore be used in situations where that function might fail. +@end deftypefun + +The following program illustrates the use of these functions. Note that +the array to contain the return addresses returned by @code{backtrace} +is allocated on the stack. Therefore code like this can be used in +situations where the memory handling via @code{malloc} does not work +anymore (in which case the @code{backtrace_symbols} has to be replaced +by a @code{backtrace_symbols_fd} call as well). The number of return +addresses is normally not very large. Even complicated programs rather +seldom have a nesting level of more than, say, 50 and with 200 possible +entries probably all programs should be covered. + +@smallexample +@include execinfo.c.texi +@end smallexample diff --git a/REORG.TODO/manual/dir b/REORG.TODO/manual/dir new file mode 100644 index 0000000000..ee98f79f59 --- /dev/null +++ b/REORG.TODO/manual/dir @@ -0,0 +1,15 @@ +This is the file .../info/dir, which contains the topmost node of the +Info hierarchy. The first time you invoke Info you start off +looking at that node, which is (dir)Top. + +File: dir Node: Top This is the top of the INFO tree + + This (the Directory node) gives a menu of major topics. + Typing "q" exits, "?" lists all Info commands, "d" returns here, + "h" gives a primer for first-timers, + "mEmacs<Return>" visits the Emacs topic, etc. + + In Emacs, you can click mouse button 2 on a menu item or cross reference + to select it. + +* Menu: diff --git a/REORG.TODO/manual/errno.texi b/REORG.TODO/manual/errno.texi new file mode 100644 index 0000000000..f4c07f0683 --- /dev/null +++ b/REORG.TODO/manual/errno.texi @@ -0,0 +1,1701 @@ +@node Error Reporting, Memory, Introduction, Top +@chapter Error Reporting +@c %MENU% How library functions report errors +@cindex error reporting +@cindex reporting errors +@cindex error codes +@cindex status codes + +Many functions in @theglibc{} detect and report error conditions, +and sometimes your programs need to check for these error conditions. +For example, when you open an input file, you should verify that the +file was actually opened correctly, and print an error message or take +other appropriate action if the call to the library function failed. + +This chapter describes how the error reporting facility works. Your +program should include the header file @file{errno.h} to use this +facility. +@pindex errno.h + +@menu +* Checking for Errors:: How errors are reported by library functions. +* Error Codes:: Error code macros; all of these expand + into integer constant values. +* Error Messages:: Mapping error codes onto error messages. +@end menu + +@node Checking for Errors, Error Codes, , Error Reporting +@section Checking for Errors + +Most library functions return a special value to indicate that they have +failed. The special value is typically @code{-1}, a null pointer, or a +constant such as @code{EOF} that is defined for that purpose. But this +return value tells you only that an error has occurred. To find out +what kind of error it was, you need to look at the error code stored in the +variable @code{errno}. This variable is declared in the header file +@file{errno.h}. +@pindex errno.h + +@comment errno.h +@comment ISO +@deftypevr {Variable} {volatile int} errno +The variable @code{errno} contains the system error number. You can +change the value of @code{errno}. + +Since @code{errno} is declared @code{volatile}, it might be changed +asynchronously by a signal handler; see @ref{Defining Handlers}. +However, a properly written signal handler saves and restores the value +of @code{errno}, so you generally do not need to worry about this +possibility except when writing signal handlers. + +The initial value of @code{errno} at program startup is zero. Many +library functions are guaranteed to set it to certain nonzero values +when they encounter certain kinds of errors. These error conditions are +listed for each function. These functions do not change @code{errno} +when they succeed; thus, the value of @code{errno} after a successful +call is not necessarily zero, and you should not use @code{errno} to +determine @emph{whether} a call failed. The proper way to do that is +documented for each function. @emph{If} the call failed, you can +examine @code{errno}. + +Many library functions can set @code{errno} to a nonzero value as a +result of calling other library functions which might fail. You should +assume that any library function might alter @code{errno} when the +function returns an error. + +@strong{Portability Note:} @w{ISO C} specifies @code{errno} as a +``modifiable lvalue'' rather than as a variable, permitting it to be +implemented as a macro. For example, its expansion might involve a +function call, like @w{@code{*__errno_location ()}}. In fact, that is +what it is +on @gnulinuxhurdsystems{}. @Theglibc{}, on each system, does +whatever is right for the particular system. + +There are a few library functions, like @code{sqrt} and @code{atan}, +that return a perfectly legitimate value in case of an error, but also +set @code{errno}. For these functions, if you want to check to see +whether an error occurred, the recommended method is to set @code{errno} +to zero before calling the function, and then check its value afterward. +@end deftypevr + +@pindex errno.h +All the error codes have symbolic names; they are macros defined in +@file{errno.h}. The names start with @samp{E} and an upper-case +letter or digit; you should consider names of this form to be +reserved names. @xref{Reserved Names}. + +The error code values are all positive integers and are all distinct, +with one exception: @code{EWOULDBLOCK} and @code{EAGAIN} are the same. +Since the values are distinct, you can use them as labels in a +@code{switch} statement; just don't use both @code{EWOULDBLOCK} and +@code{EAGAIN}. Your program should not make any other assumptions about +the specific values of these symbolic constants. + +The value of @code{errno} doesn't necessarily have to correspond to any +of these macros, since some library functions might return other error +codes of their own for other situations. The only values that are +guaranteed to be meaningful for a particular library function are the +ones that this manual lists for that function. + +Except on @gnuhurdsystems{}, almost any system call can return @code{EFAULT} if +it is given an invalid pointer as an argument. Since this could only +happen as a result of a bug in your program, and since it will not +happen on @gnuhurdsystems{}, we have saved space by not mentioning +@code{EFAULT} in the descriptions of individual functions. + +In some Unix systems, many system calls can also return @code{EFAULT} if +given as an argument a pointer into the stack, and the kernel for some +obscure reason fails in its attempt to extend the stack. If this ever +happens, you should probably try using statically or dynamically +allocated memory instead of stack memory on that system. + +@node Error Codes, Error Messages, Checking for Errors, Error Reporting +@section Error Codes + +@pindex errno.h +The error code macros are defined in the header file @file{errno.h}. +All of them expand into integer constant values. Some of these error +codes can't occur on @gnusystems{}, but they can occur using @theglibc{} +on other systems. + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EPERM +@errno{EPERM, 1, Operation not permitted} +Only the owner of the file (or other resource) +or processes with special privileges can perform the operation. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOENT +@errno{ENOENT, 2, No such file or directory} +This is a ``file doesn't exist'' error +for ordinary files that are referenced in contexts where they are +expected to already exist. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ESRCH +@errno{ESRCH, 3, No such process} +No process matches the specified process ID. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EINTR +@errno{EINTR, 4, Interrupted system call} +An asynchronous signal occurred and prevented +completion of the call. When this happens, you should try the call +again. + +You can choose to have functions resume after a signal that is handled, +rather than failing with @code{EINTR}; see @ref{Interrupted +Primitives}. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EIO +@errno{EIO, 5, Input/output error} +Usually used for physical read or write errors. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENXIO +@errno{ENXIO, 6, No such device or address} +The system tried to use the device +represented by a file you specified, and it couldn't find the device. +This can mean that the device file was installed incorrectly, or that +the physical device is missing or not correctly attached to the +computer. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int E2BIG +@errno{E2BIG, 7, Argument list too long} +Used when the arguments passed to a new program +being executed with one of the @code{exec} functions (@pxref{Executing a +File}) occupy too much memory space. This condition never arises on +@gnuhurdsystems{}. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOEXEC +@errno{ENOEXEC, 8, Exec format error} +Invalid executable file format. This condition is detected by the +@code{exec} functions; see @ref{Executing a File}. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EBADF +@errno{EBADF, 9, Bad file descriptor} +For example, I/O on a descriptor that has been +closed or reading from a descriptor open only for writing (or vice +versa). +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ECHILD +@errno{ECHILD, 10, No child processes} +This error happens on operations that are +supposed to manipulate child processes, when there aren't any processes +to manipulate. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EDEADLK +@errno{EDEADLK, 11, Resource deadlock avoided} +Allocating a system resource would have resulted in a +deadlock situation. The system does not guarantee that it will notice +all such situations. This error means you got lucky and the system +noticed; it might just hang. @xref{File Locks}, for an example. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOMEM +@errno{ENOMEM, 12, Cannot allocate memory} +The system cannot allocate more virtual memory +because its capacity is full. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EACCES +@errno{EACCES, 13, Permission denied} +The file permissions do not allow the attempted operation. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EFAULT +@errno{EFAULT, 14, Bad address} +An invalid pointer was detected. +On @gnuhurdsystems{}, this error never happens; you get a signal instead. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENOTBLK +@errno{ENOTBLK, 15, Block device required} +A file that isn't a block special file was given in a situation that +requires one. For example, trying to mount an ordinary file as a file +system in Unix gives this error. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EBUSY +@errno{EBUSY, 16, Device or resource busy} +A system resource that can't be shared is already in use. +For example, if you try to delete a file that is the root of a currently +mounted filesystem, you get this error. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EEXIST +@errno{EEXIST, 17, File exists} +An existing file was specified in a context where it only +makes sense to specify a new file. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EXDEV +@errno{EXDEV, 18, Invalid cross-device link} +An attempt to make an improper link across file systems was detected. +This happens not only when you use @code{link} (@pxref{Hard Links}) but +also when you rename a file with @code{rename} (@pxref{Renaming Files}). +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENODEV +@errno{ENODEV, 19, No such device} +The wrong type of device was given to a function that expects a +particular sort of device. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOTDIR +@errno{ENOTDIR, 20, Not a directory} +A file that isn't a directory was specified when a directory is required. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EISDIR +@errno{EISDIR, 21, Is a directory} +You cannot open a directory for writing, +or create or remove hard links to it. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EINVAL +@errno{EINVAL, 22, Invalid argument} +This is used to indicate various kinds of problems +with passing the wrong argument to a library function. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EMFILE +@errno{EMFILE, 24, Too many open files} +The current process has too many files open and can't open any more. +Duplicate descriptors do count toward this limit. + +In BSD and GNU, the number of open files is controlled by a resource +limit that can usually be increased. If you get this error, you might +want to increase the @code{RLIMIT_NOFILE} limit or make it unlimited; +@pxref{Limits on Resources}. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENFILE +@errno{ENFILE, 23, Too many open files in system} +There are too many distinct file openings in the entire system. Note +that any number of linked channels count as just one file opening; see +@ref{Linked Channels}. This error never occurs on @gnuhurdsystems{}. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOTTY +@errno{ENOTTY, 25, Inappropriate ioctl for device} +Inappropriate I/O control operation, such as trying to set terminal +modes on an ordinary file. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ETXTBSY +@errno{ETXTBSY, 26, Text file busy} +An attempt to execute a file that is currently open for writing, or +write to a file that is currently being executed. Often using a +debugger to run a program is considered having it open for writing and +will cause this error. (The name stands for ``text file busy''.) This +is not an error on @gnuhurdsystems{}; the text is copied as necessary. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EFBIG +@errno{EFBIG, 27, File too large} +The size of a file would be larger than allowed by the system. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOSPC +@errno{ENOSPC, 28, No space left on device} +Write operation on a file failed because the +disk is full. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ESPIPE +@errno{ESPIPE, 29, Illegal seek} +Invalid seek operation (such as on a pipe). +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EROFS +@errno{EROFS, 30, Read-only file system} +An attempt was made to modify something on a read-only file system. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EMLINK +@errno{EMLINK, 31, Too many links} +The link count of a single file would become too large. +@code{rename} can cause this error if the file being renamed already has +as many links as it can take (@pxref{Renaming Files}). +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EPIPE +@errno{EPIPE, 32, Broken pipe} +There is no process reading from the other end of a pipe. +Every library function that returns this error code also generates a +@code{SIGPIPE} signal; this signal terminates the program if not handled +or blocked. Thus, your program will never actually see @code{EPIPE} +unless it has handled or blocked @code{SIGPIPE}. +@end deftypevr + +@comment errno.h +@comment ISO +@deftypevr Macro int EDOM +@errno{EDOM, 33, Numerical argument out of domain} +Used by mathematical functions when an argument value does +not fall into the domain over which the function is defined. +@end deftypevr + +@comment errno.h +@comment ISO +@deftypevr Macro int ERANGE +@errno{ERANGE, 34, Numerical result out of range} +Used by mathematical functions when the result value is +not representable because of overflow or underflow. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int EAGAIN +@errno{EAGAIN, 35, Resource temporarily unavailable} +The call might work if you try again +later. The macro @code{EWOULDBLOCK} is another name for @code{EAGAIN}; +they are always the same in @theglibc{}. + +This error can happen in a few different situations: + +@itemize @bullet +@item +An operation that would block was attempted on an object that has +non-blocking mode selected. Trying the same operation again will block +until some external condition makes it possible to read, write, or +connect (whatever the operation). You can use @code{select} to find out +when the operation will be possible; @pxref{Waiting for I/O}. + +@strong{Portability Note:} In many older Unix systems, this condition +was indicated by @code{EWOULDBLOCK}, which was a distinct error code +different from @code{EAGAIN}. To make your program portable, you should +check for both codes and treat them the same. + +@item +A temporary resource shortage made an operation impossible. @code{fork} +can return this error. It indicates that the shortage is expected to +pass, so your program can try the call again later and it may succeed. +It is probably a good idea to delay for a few seconds before trying it +again, to allow time for other processes to release scarce resources. +Such shortages are usually fairly serious and affect the whole system, +so usually an interactive program should report the error to the user +and return to its command loop. +@end itemize +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EWOULDBLOCK +@errno{EWOULDBLOCK, EAGAIN, Operation would block} +In @theglibc{}, this is another name for @code{EAGAIN} (above). +The values are always the same, on every operating system. + +C libraries in many older Unix systems have @code{EWOULDBLOCK} as a +separate error code. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EINPROGRESS +@errno{EINPROGRESS, 36, Operation now in progress} +An operation that cannot complete immediately was initiated on an object +that has non-blocking mode selected. Some functions that must always +block (such as @code{connect}; @pxref{Connecting}) never return +@code{EAGAIN}. Instead, they return @code{EINPROGRESS} to indicate that +the operation has begun and will take some time. Attempts to manipulate +the object before the call completes return @code{EALREADY}. You can +use the @code{select} function to find out when the pending operation +has completed; @pxref{Waiting for I/O}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EALREADY +@errno{EALREADY, 37, Operation already in progress} +An operation is already in progress on an object that has non-blocking +mode selected. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENOTSOCK +@errno{ENOTSOCK, 38, Socket operation on non-socket} +A file that isn't a socket was specified when a socket is required. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EMSGSIZE +@errno{EMSGSIZE, 40, Message too long} +The size of a message sent on a socket was larger than the supported +maximum size. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EPROTOTYPE +@errno{EPROTOTYPE, 41, Protocol wrong type for socket} +The socket type does not support the requested communications protocol. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENOPROTOOPT +@errno{ENOPROTOOPT, 42, Protocol not available} +You specified a socket option that doesn't make sense for the +particular protocol being used by the socket. @xref{Socket Options}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EPROTONOSUPPORT +@errno{EPROTONOSUPPORT, 43, Protocol not supported} +The socket domain does not support the requested communications protocol +(perhaps because the requested protocol is completely invalid). +@xref{Creating a Socket}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ESOCKTNOSUPPORT +@errno{ESOCKTNOSUPPORT, 44, Socket type not supported} +The socket type is not supported. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EOPNOTSUPP +@errno{EOPNOTSUPP, 45, Operation not supported} +The operation you requested is not supported. Some socket functions +don't make sense for all types of sockets, and others may not be +implemented for all communications protocols. On @gnuhurdsystems{}, this +error can happen for many calls when the object does not support the +particular operation; it is a generic indication that the server knows +nothing to do for that call. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EPFNOSUPPORT +@errno{EPFNOSUPPORT, 46, Protocol family not supported} +The socket communications protocol family you requested is not supported. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EAFNOSUPPORT +@errno{EAFNOSUPPORT, 47, Address family not supported by protocol} +The address family specified for a socket is not supported; it is +inconsistent with the protocol being used on the socket. @xref{Sockets}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EADDRINUSE +@errno{EADDRINUSE, 48, Address already in use} +The requested socket address is already in use. @xref{Socket Addresses}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EADDRNOTAVAIL +@errno{EADDRNOTAVAIL, 49, Cannot assign requested address} +The requested socket address is not available; for example, you tried +to give a socket a name that doesn't match the local host name. +@xref{Socket Addresses}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENETDOWN +@errno{ENETDOWN, 50, Network is down} +A socket operation failed because the network was down. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENETUNREACH +@errno{ENETUNREACH, 51, Network is unreachable} +A socket operation failed because the subnet containing the remote host +was unreachable. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENETRESET +@errno{ENETRESET, 52, Network dropped connection on reset} +A network connection was reset because the remote host crashed. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ECONNABORTED +@errno{ECONNABORTED, 53, Software caused connection abort} +A network connection was aborted locally. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ECONNRESET +@errno{ECONNRESET, 54, Connection reset by peer} +A network connection was closed for reasons outside the control of the +local host, such as by the remote machine rebooting or an unrecoverable +protocol violation. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENOBUFS +@errno{ENOBUFS, 55, No buffer space available} +The kernel's buffers for I/O operations are all in use. In GNU, this +error is always synonymous with @code{ENOMEM}; you may get one or the +other from network operations. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EISCONN +@errno{EISCONN, 56, Transport endpoint is already connected} +You tried to connect a socket that is already connected. +@xref{Connecting}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENOTCONN +@errno{ENOTCONN, 57, Transport endpoint is not connected} +The socket is not connected to anything. You get this error when you +try to transmit data over a socket, without first specifying a +destination for the data. For a connectionless socket (for datagram +protocols, such as UDP), you get @code{EDESTADDRREQ} instead. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EDESTADDRREQ +@errno{EDESTADDRREQ, 39, Destination address required} +No default destination address was set for the socket. You get this +error when you try to transmit data over a connectionless socket, +without first specifying a destination for the data with @code{connect}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ESHUTDOWN +@errno{ESHUTDOWN, 58, Cannot send after transport endpoint shutdown} +The socket has already been shut down. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ETOOMANYREFS +@errno{ETOOMANYREFS, 59, Too many references: cannot splice} +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ETIMEDOUT +@errno{ETIMEDOUT, 60, Connection timed out} +A socket operation with a specified timeout received no response during +the timeout period. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ECONNREFUSED +@errno{ECONNREFUSED, 61, Connection refused} +A remote host refused to allow the network connection (typically because +it is not running the requested service). +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ELOOP +@errno{ELOOP, 62, Too many levels of symbolic links} +Too many levels of symbolic links were encountered in looking up a file name. +This often indicates a cycle of symbolic links. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENAMETOOLONG +@errno{ENAMETOOLONG, 63, File name too long} +Filename too long (longer than @code{PATH_MAX}; @pxref{Limits for +Files}) or host name too long (in @code{gethostname} or +@code{sethostname}; @pxref{Host Identification}). +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EHOSTDOWN +@errno{EHOSTDOWN, 64, Host is down} +The remote host for a requested network connection is down. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EHOSTUNREACH +@errno{EHOSTUNREACH, 65, No route to host} +The remote host for a requested network connection is not reachable. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOTEMPTY +@errno{ENOTEMPTY, 66, Directory not empty} +Directory not empty, where an empty directory was expected. Typically, +this error occurs when you are trying to delete a directory. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EPROCLIM +@errno{EPROCLIM, 67, Too many processes} +This means that the per-user limit on new process would be exceeded by +an attempted @code{fork}. @xref{Limits on Resources}, for details on +the @code{RLIMIT_NPROC} limit. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EUSERS +@errno{EUSERS, 68, Too many users} +The file quota system is confused because there are too many users. +@c This can probably happen in a GNU system when using NFS. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EDQUOT +@errno{EDQUOT, 69, Disk quota exceeded} +The user's disk quota was exceeded. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ESTALE +@errno{ESTALE, 70, Stale file handle} +This indicates an internal confusion in the +file system which is due to file system rearrangements on the server host +for NFS file systems or corruption in other file systems. +Repairing this condition usually requires unmounting, possibly repairing +and remounting the file system. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EREMOTE +@errno{EREMOTE, 71, Object is remote} +An attempt was made to NFS-mount a remote file system with a file name that +already specifies an NFS-mounted file. +(This is an error on some operating systems, but we expect it to work +properly on @gnuhurdsystems{}, making this error code impossible.) +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EBADRPC +@errno{EBADRPC, 72, RPC struct is bad} +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ERPCMISMATCH +@errno{ERPCMISMATCH, 73, RPC version wrong} +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EPROGUNAVAIL +@errno{EPROGUNAVAIL, 74, RPC program not available} +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EPROGMISMATCH +@errno{EPROGMISMATCH, 75, RPC program version wrong} +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EPROCUNAVAIL +@errno{EPROCUNAVAIL, 76, RPC bad procedure for program} +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOLCK +@errno{ENOLCK, 77, No locks available} +This is used by the file locking facilities; see +@ref{File Locks}. This error is never generated by @gnuhurdsystems{}, but +it can result from an operation to an NFS server running another +operating system. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EFTYPE +@errno{EFTYPE, 79, Inappropriate file type or format} +The file was the wrong type for the +operation, or a data file had the wrong format. + +On some systems @code{chmod} returns this error if you try to set the +sticky bit on a non-directory file; @pxref{Setting Permissions}. +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int EAUTH +@errno{EAUTH, 80, Authentication error} +@end deftypevr + +@comment errno.h +@comment BSD +@deftypevr Macro int ENEEDAUTH +@errno{ENEEDAUTH, 81, Need authenticator} +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOSYS +@errno{ENOSYS, 78, Function not implemented} +This indicates that the function called is +not implemented at all, either in the C library itself or in the +operating system. When you get this error, you can be sure that this +particular function will always fail with @code{ENOSYS} unless you +install a new version of the C library or the operating system. +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ENOTSUP +@errno{ENOTSUP, 118, Not supported} +A function returns this error when certain parameter +values are valid, but the functionality they request is not available. +This can mean that the function does not implement a particular command +or option value or flag bit at all. For functions that operate on some +object given in a parameter, such as a file descriptor or a port, it +might instead mean that only @emph{that specific object} (file +descriptor, port, etc.) is unable to support the other parameters given; +different file descriptors might support different ranges of parameter +values. + +If the entire function is not available at all in the implementation, +it returns @code{ENOSYS} instead. +@end deftypevr + +@comment errno.h +@comment ISO +@deftypevr Macro int EILSEQ +@errno{EILSEQ, 106, Invalid or incomplete multibyte or wide character} +While decoding a multibyte character the function came along an invalid +or an incomplete sequence of bytes or the given wide character is invalid. +@end deftypevr + +@comment errno.h +@comment GNU +@deftypevr Macro int EBACKGROUND +@errno{EBACKGROUND, 100, Inappropriate operation for background process} +On @gnuhurdsystems{}, servers supporting the @code{term} protocol return +this error for certain operations when the caller is not in the +foreground process group of the terminal. Users do not usually see this +error because functions such as @code{read} and @code{write} translate +it into a @code{SIGTTIN} or @code{SIGTTOU} signal. @xref{Job Control}, +for information on process groups and these signals. +@end deftypevr + +@comment errno.h +@comment GNU +@deftypevr Macro int EDIED +@errno{EDIED, 101, Translator died} +On @gnuhurdsystems{}, opening a file returns this error when the file is +translated by a program and the translator program dies while starting +up, before it has connected to the file. +@end deftypevr + +@comment errno.h +@comment GNU +@deftypevr Macro int ED +@errno{ED, 102, ?} +The experienced user will know what is wrong. +@c This error code is a joke. Its perror text is part of the joke. +@c Don't change it. +@end deftypevr + +@comment errno.h +@comment GNU +@deftypevr Macro int EGREGIOUS +@errno{EGREGIOUS, 103, You really blew it this time} +You did @strong{what}? +@end deftypevr + +@comment errno.h +@comment GNU +@deftypevr Macro int EIEIO +@errno{EIEIO, 104, Computer bought the farm} +Go home and have a glass of warm, dairy-fresh milk. +@end deftypevr + +@comment errno.h +@comment GNU +@deftypevr Macro int EGRATUITOUS +@errno{EGRATUITOUS, 105, Gratuitous error} +This error code has no purpose. +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int EBADMSG +@errno{EBADMSG, 107, Bad message} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int EIDRM +@errno{EIDRM, 108, Identifier removed} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int EMULTIHOP +@errno{EMULTIHOP, 109, Multihop attempted} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int ENODATA +@errno{ENODATA, 110, No data available} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int ENOLINK +@errno{ENOLINK, 111, Link has been severed} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int ENOMSG +@errno{ENOMSG, 112, No message of desired type} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int ENOSR +@errno{ENOSR, 113, Out of streams resources} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int ENOSTR +@errno{ENOSTR, 114, Device not a stream} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int EOVERFLOW +@errno{EOVERFLOW, 115, Value too large for defined data type} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int EPROTO +@errno{EPROTO, 116, Protocol error} +@end deftypevr + +@comment errno.h +@comment XOPEN +@deftypevr Macro int ETIME +@errno{ETIME, 117, Timer expired} +@end deftypevr + +@comment errno.h +@comment POSIX.1 +@deftypevr Macro int ECANCELED +@errno{ECANCELED, 119, Operation canceled} +An asynchronous operation was canceled before it +completed. @xref{Asynchronous I/O}. When you call @code{aio_cancel}, +the normal result is for the operations affected to complete with this +error; @pxref{Cancel AIO Operations}. +@end deftypevr + + +@emph{The following error codes are defined by the Linux/i386 kernel. +They are not yet documented.} + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ERESTART +@errno{ERESTART, ???/85, Interrupted system call should be restarted} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ECHRNG +@errno{ECHRNG, ???/44, Channel number out of range} +@end deftypevr + +@comment errno.h +@comment Obsolete +@deftypevr Macro int EL2NSYNC +@errno{EL2NSYNC, ???/45, Level 2 not synchronized} +@end deftypevr + +@comment errno.h +@comment Obsolete +@deftypevr Macro int EL3HLT +@errno{EL3HLT, ???/46, Level 3 halted} +@end deftypevr + +@comment errno.h +@comment Obsolete +@deftypevr Macro int EL3RST +@errno{EL3RST, ???/47, Level 3 reset} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ELNRNG +@errno{ELNRNG, ???/48, Link number out of range} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EUNATCH +@errno{EUNATCH, ???/49, Protocol driver not attached} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ENOCSI +@errno{ENOCSI, ???/50, No CSI structure available} +@end deftypevr + +@comment errno.h +@comment Obsolete +@deftypevr Macro int EL2HLT +@errno{EL2HLT, ???/51, Level 2 halted} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EBADE +@errno{EBADE, ???/52, Invalid exchange} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EBADR +@errno{EBADR, ???/53, Invalid request descriptor} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EXFULL +@errno{EXFULL, ???/54, Exchange full} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ENOANO +@errno{ENOANO, ???/55, No anode} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EBADRQC +@errno{EBADRQC, ???/56, Invalid request code} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EBADSLT +@errno{EBADSLT, ???/57, Invalid slot} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EDEADLOCK +@errno{EDEADLOCK, ???/58, File locking deadlock error} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EBFONT +@errno{EBFONT, ???/59, Bad font file format} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ENONET +@errno{ENONET, ???/64, Machine is not on the network} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ENOPKG +@errno{ENOPKG, ???/65, Package not installed} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EADV +@errno{EADV, ???/68, Advertise error} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ESRMNT +@errno{ESRMNT, ???/69, Srmount error} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ECOMM +@errno{ECOMM, ???/70, Communication error on send} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EDOTDOT +@errno{EDOTDOT, ???/73, RFS specific error} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ENOTUNIQ +@errno{ENOTUNIQ, ???/76, Name not unique on network} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EBADFD +@errno{EBADFD, ???/77, File descriptor in bad state} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EREMCHG +@errno{EREMCHG, ???/78, Remote address changed} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ELIBACC +@errno{ELIBACC, ???/79, Can not access a needed shared library} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ELIBBAD +@errno{ELIBBAD, ???/80, Accessing a corrupted shared library} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ELIBSCN +@errno{ELIBSCN, ???/81, .lib section in a.out corrupted} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ELIBMAX +@errno{ELIBMAX, ???/82, Attempting to link in too many shared libraries} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ELIBEXEC +@errno{ELIBEXEC, ???/83, Cannot exec a shared library directly} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ESTRPIPE +@errno{ESTRPIPE, ???/86, Streams pipe error} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EUCLEAN +@errno{EUCLEAN, ???/117, Structure needs cleaning} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ENOTNAM +@errno{ENOTNAM, ???/118, Not a XENIX named type file} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ENAVAIL +@errno{ENAVAIL, ???/119, No XENIX semaphores available} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EISNAM +@errno{EISNAM, ???/120, Is a named type file} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EREMOTEIO +@errno{EREMOTEIO, ???/121, Remote I/O error} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int ENOMEDIUM +@errno{ENOMEDIUM, ???/???, No medium found} +@end deftypevr + +@comment errno.h +@comment Linux??? +@deftypevr Macro int EMEDIUMTYPE +@errno{EMEDIUMTYPE, ???/???, Wrong medium type} +@end deftypevr + +@comment errno.h +@comment Linux +@deftypevr Macro int ENOKEY +@errno{ENOKEY, ???/???, Required key not available} +@end deftypevr + +@comment errno.h +@comment Linux +@deftypevr Macro int EKEYEXPIRED +@errno{EKEYEXPIRED, ???/???, Key has expired} +@end deftypevr + +@comment errno.h +@comment Linux +@deftypevr Macro int EKEYREVOKED +@errno{EKEYREVOKED, ???/???, Key has been revoked} +@end deftypevr + +@comment errno.h +@comment Linux +@deftypevr Macro int EKEYREJECTED +@errno{EKEYREJECTED, ???/???, Key was rejected by service} +@end deftypevr + +@comment errno.h +@comment Linux +@deftypevr Macro int EOWNERDEAD +@errno{EOWNERDEAD, ???/???, Owner died} +@end deftypevr + +@comment errno.h +@comment Linux +@deftypevr Macro int ENOTRECOVERABLE +@errno{ENOTRECOVERABLE, ???/???, State not recoverable} +@end deftypevr + +@comment errno.h +@comment Linux +@deftypevr Macro int ERFKILL +@errno{ERFKILL, ???/???, Operation not possible due to RF-kill} +@end deftypevr + +@comment errno.h +@comment Linux +@deftypevr Macro int EHWPOISON +@errno{EHWPOISON, ???/???, Memory page has hardware error} +@end deftypevr + +@node Error Messages, , Error Codes, Error Reporting +@section Error Messages + +The library has functions and variables designed to make it easy for +your program to report informative error messages in the customary +format about the failure of a library call. The functions +@code{strerror} and @code{perror} give you the standard error message +for a given error code; the variable +@w{@code{program_invocation_short_name}} gives you convenient access to the +name of the program that encountered the error. + +@comment string.h +@comment ISO +@deftypefun {char *} strerror (int @var{errnum}) +@safety{@prelim{}@mtunsafe{@mtasurace{:strerror}}@asunsafe{@ascuheap{} @ascuintl{}}@acunsafe{@acsmem{}}} +@c Calls strerror_r with a static buffer allocated with malloc on the +@c first use. +The @code{strerror} function maps the error code (@pxref{Checking for +Errors}) specified by the @var{errnum} argument to a descriptive error +message string. The return value is a pointer to this string. + +The value @var{errnum} normally comes from the variable @code{errno}. + +You should not modify the string returned by @code{strerror}. Also, if +you make subsequent calls to @code{strerror}, the string might be +overwritten. (But it's guaranteed that no library function ever calls +@code{strerror} behind your back.) + +The function @code{strerror} is declared in @file{string.h}. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefun {char *} strerror_r (int @var{errnum}, char *@var{buf}, size_t @var{n}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuintl{}}@acunsafe{}} +The @code{strerror_r} function works like @code{strerror} but instead of +returning the error message in a statically allocated buffer shared by +all threads in the process, it returns a private copy for the +thread. This might be either some permanent global data or a message +string in the user supplied buffer starting at @var{buf} with the +length of @var{n} bytes. + +At most @var{n} characters are written (including the NUL byte) so it is +up to the user to select a buffer large enough. + +This function should always be used in multi-threaded programs since +there is no way to guarantee the string returned by @code{strerror} +really belongs to the last call of the current thread. + +The function @code{strerror_r} is a GNU extension and it is declared in +@file{string.h}. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun void perror (const char *@var{message}) +@safety{@prelim{}@mtsafe{@mtasurace{:stderr}}@asunsafe{@asucorrupt{} @ascuintl{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c Besides strerror_r's and some of fprintf's issues, if stderr is not +@c oriented yet, create a new stream with a dup of stderr's fd and write +@c to that instead of stderr, to avoid orienting it. +This function prints an error message to the stream @code{stderr}; +see @ref{Standard Streams}. The orientation of @code{stderr} is not +changed. + +If you call @code{perror} with a @var{message} that is either a null +pointer or an empty string, @code{perror} just prints the error message +corresponding to @code{errno}, adding a trailing newline. + +If you supply a non-null @var{message} argument, then @code{perror} +prefixes its output with this string. It adds a colon and a space +character to separate the @var{message} from the error string corresponding +to @code{errno}. + +The function @code{perror} is declared in @file{stdio.h}. +@end deftypefun + +@code{strerror} and @code{perror} produce the exact same message for any +given error code; the precise text varies from system to system. With +@theglibc{}, the messages are fairly short; there are no multi-line +messages or embedded newlines. Each error message begins with a capital +letter and does not include any terminating punctuation. + +@cindex program name +@cindex name of running program +Many programs that don't read input from the terminal are designed to +exit if any system call fails. By convention, the error message from +such a program should start with the program's name, sans directories. +You can find that name in the variable +@code{program_invocation_short_name}; the full file name is stored the +variable @code{program_invocation_name}. + +@comment errno.h +@comment GNU +@deftypevar {char *} program_invocation_name +This variable's value is the name that was used to invoke the program +running in the current process. It is the same as @code{argv[0]}. Note +that this is not necessarily a useful file name; often it contains no +directory names. @xref{Program Arguments}. + +This variable is a GNU extension and is declared in @file{errno.h}. +@end deftypevar + +@comment errno.h +@comment GNU +@deftypevar {char *} program_invocation_short_name +This variable's value is the name that was used to invoke the program +running in the current process, with directory names removed. (That is +to say, it is the same as @code{program_invocation_name} minus +everything up to the last slash, if any.) + +This variable is a GNU extension and is declared in @file{errno.h}. +@end deftypevar + +The library initialization code sets up both of these variables before +calling @code{main}. + +@strong{Portability Note:} If you want your program to work with +non-GNU libraries, you must save the value of @code{argv[0]} in +@code{main}, and then strip off the directory names yourself. We +added these extensions to make it possible to write self-contained +error-reporting subroutines that require no explicit cooperation from +@code{main}. + +Here is an example showing how to handle failure to open a file +correctly. The function @code{open_sesame} tries to open the named file +for reading and returns a stream if successful. The @code{fopen} +library function returns a null pointer if it couldn't open the file for +some reason. In that situation, @code{open_sesame} constructs an +appropriate error message using the @code{strerror} function, and +terminates the program. If we were going to make some other library +calls before passing the error code to @code{strerror}, we'd have to +save it in a local variable instead, because those other library +functions might overwrite @code{errno} in the meantime. + +@smallexample +#define _GNU_SOURCE + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +FILE * +open_sesame (char *name) +@{ + FILE *stream; + + errno = 0; + stream = fopen (name, "r"); + if (stream == NULL) + @{ + fprintf (stderr, "%s: Couldn't open file %s; %s\n", + program_invocation_short_name, name, strerror (errno)); + exit (EXIT_FAILURE); + @} + else + return stream; +@} +@end smallexample + +Using @code{perror} has the advantage that the function is portable and +available on all systems implementing @w{ISO C}. But often the text +@code{perror} generates is not what is wanted and there is no way to +extend or change what @code{perror} does. The GNU coding standard, for +instance, requires error messages to be preceded by the program name and +programs which read some input files should provide information +about the input file name and the line number in case an error is +encountered while reading the file. For these occasions there are two +functions available which are widely used throughout the GNU project. +These functions are declared in @file{error.h}. + +@comment error.h +@comment GNU +@deftypefun void error (int @var{status}, int @var{errnum}, const char *@var{format}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @asuheap{} @asuintl{}}@acsafe{}} +@c Cancellation is disabled throughout the execution. It flushes stdout +@c and then holds a lock on stderr while printing the program name and +@c then running error_tail. The non-wide case just runs vfprintf; the +@c wide case converts the message to an alloca/malloc-allocated buffer +@c with mbsrtowcs, then prints it with vfwprintf. Afterwards, +@c print_errno_message calls strerror_r and fxprintf. +The @code{error} function can be used to report general problems during +program execution. The @var{format} argument is a format string just +like those given to the @code{printf} family of functions. The +arguments required for the format can follow the @var{format} parameter. +Just like @code{perror}, @code{error} also can report an error code in +textual form. But unlike @code{perror} the error value is explicitly +passed to the function in the @var{errnum} parameter. This eliminates +the problem mentioned above that the error reporting function must be +called immediately after the function causing the error since otherwise +@code{errno} might have a different value. + +@code{error} prints first the program name. If the application +defined a global variable @code{error_print_progname} and points it to a +function this function will be called to print the program name. +Otherwise the string from the global variable @code{program_name} is +used. The program name is followed by a colon and a space which in turn +is followed by the output produced by the format string. If the +@var{errnum} parameter is non-zero the format string output is followed +by a colon and a space, followed by the error message for the error code +@var{errnum}. In any case is the output terminated with a newline. + +The output is directed to the @code{stderr} stream. If the +@code{stderr} wasn't oriented before the call it will be narrow-oriented +afterwards. + +The function will return unless the @var{status} parameter has a +non-zero value. In this case the function will call @code{exit} with +the @var{status} value for its parameter and therefore never return. If +@code{error} returns, the global variable @code{error_message_count} is +incremented by one to keep track of the number of errors reported. +@end deftypefun + +@comment error.h +@comment GNU +@deftypefun void error_at_line (int @var{status}, int @var{errnum}, const char *@var{fname}, unsigned int @var{lineno}, const char *@var{format}, @dots{}) +@safety{@prelim{}@mtunsafe{@mtasurace{:error_at_line/error_one_per_line} @mtslocale{}}@asunsafe{@asucorrupt{} @asuheap{} @asuintl{}}@acunsafe{@acucorrupt{/error_one_per_line}}} +@c The error_one_per_line variable is accessed (without any form of +@c synchronization, but since it's an int used once, it should be safe +@c enough) and, if this mode is enabled, static variables used to hold +@c the last printed file name and line number are accessed and modified +@c without synchronization; the update is not atomic and it occurs +@c before disabling cancellation, so it can be interrupted after only +@c one of the two variables is modified. After that, it's very much +@c like error. + +The @code{error_at_line} function is very similar to the @code{error} +function. The only differences are the additional parameters @var{fname} +and @var{lineno}. The handling of the other parameters is identical to +that of @code{error} except that between the program name and the string +generated by the format string additional text is inserted. + +Directly following the program name a colon, followed by the file name +pointed to by @var{fname}, another colon, and the value of @var{lineno} is +printed. + +This additional output of course is meant to be used to locate an error +in an input file (like a programming language source code file etc). + +If the global variable @code{error_one_per_line} is set to a non-zero +value @code{error_at_line} will avoid printing consecutive messages for +the same file and line. Repetition which are not directly following +each other are not caught. + +Just like @code{error} this function only returns if @var{status} is +zero. Otherwise @code{exit} is called with the non-zero value. If +@code{error} returns, the global variable @code{error_message_count} is +incremented by one to keep track of the number of errors reported. +@end deftypefun + +As mentioned above, the @code{error} and @code{error_at_line} functions +can be customized by defining a variable named +@code{error_print_progname}. + +@comment error.h +@comment GNU +@deftypevar {void (*error_print_progname)} (void) +If the @code{error_print_progname} variable is defined to a non-zero +value the function pointed to is called by @code{error} or +@code{error_at_line}. It is expected to print the program name or do +something similarly useful. + +The function is expected to print to the @code{stderr} stream and +must be able to handle whatever orientation the stream has. + +The variable is global and shared by all threads. +@end deftypevar + +@comment error.h +@comment GNU +@deftypevar {unsigned int} error_message_count +The @code{error_message_count} variable is incremented whenever one of +the functions @code{error} or @code{error_at_line} returns. The +variable is global and shared by all threads. +@end deftypevar + +@comment error.h +@comment GNU +@deftypevar int error_one_per_line +The @code{error_one_per_line} variable influences only +@code{error_at_line}. Normally the @code{error_at_line} function +creates output for every invocation. If @code{error_one_per_line} is +set to a non-zero value @code{error_at_line} keeps track of the last +file name and line number for which an error was reported and avoids +directly following messages for the same file and line. This variable +is global and shared by all threads. +@end deftypevar + +@noindent +A program which read some input file and reports errors in it could look +like this: + +@smallexample +@{ + char *line = NULL; + size_t len = 0; + unsigned int lineno = 0; + + error_message_count = 0; + while (! feof_unlocked (fp)) + @{ + ssize_t n = getline (&line, &len, fp); + if (n <= 0) + /* @r{End of file or error.} */ + break; + ++lineno; + + /* @r{Process the line.} */ + @dots{} + + if (@r{Detect error in line}) + error_at_line (0, errval, filename, lineno, + "some error text %s", some_variable); + @} + + if (error_message_count != 0) + error (EXIT_FAILURE, 0, "%u errors found", error_message_count); +@} +@end smallexample + +@code{error} and @code{error_at_line} are clearly the functions of +choice and enable the programmer to write applications which follow the +GNU coding standard. @Theglibc{} additionally contains functions which +are used in BSD for the same purpose. These functions are declared in +@file{err.h}. It is generally advised to not use these functions. They +are included only for compatibility. + +@comment err.h +@comment BSD +@deftypefun void warn (const char *@var{format}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @ascuintl{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c Just calls vwarn with the va_list. +The @code{warn} function is roughly equivalent to a call like +@smallexample + error (0, errno, format, @r{the parameters}) +@end smallexample +@noindent +except that the global variables @code{error} respects and modifies +are not used. +@end deftypefun + +@comment err.h +@comment BSD +@deftypefun void vwarn (const char *@var{format}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @ascuintl{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c While holding stderr's recursive lock, it prints the programname, the +@c given message, and the error string with fw?printf's %m. When the +@c stream is wide, convert_and_print converts the format string to an +@c alloca/malloc-created buffer using mbsrtowcs and then calls fwprintf. +The @code{vwarn} function is just like @code{warn} except that the +parameters for the handling of the format string @var{format} are passed +in as a value of type @code{va_list}. +@end deftypefun + +@comment err.h +@comment BSD +@deftypefun void warnx (const char *@var{format}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c Same as warn, but without the strerror translation issues. +The @code{warnx} function is roughly equivalent to a call like +@smallexample + error (0, 0, format, @r{the parameters}) +@end smallexample +@noindent +except that the global variables @code{error} respects and modifies +are not used. The difference to @code{warn} is that no error number +string is printed. +@end deftypefun + +@comment err.h +@comment BSD +@deftypefun void vwarnx (const char *@var{format}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c Same as vwarn, but without the strerror translation issues. +The @code{vwarnx} function is just like @code{warnx} except that the +parameters for the handling of the format string @var{format} are passed +in as a value of type @code{va_list}. +@end deftypefun + +@comment err.h +@comment BSD +@deftypefun void err (int @var{status}, const char *@var{format}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @ascuintl{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c Same as warn followed by exit. +The @code{err} function is roughly equivalent to a call like +@smallexample + error (status, errno, format, @r{the parameters}) +@end smallexample +@noindent +except that the global variables @code{error} respects and modifies +are not used and that the program is exited even if @var{status} is zero. +@end deftypefun + +@comment err.h +@comment BSD +@deftypefun void verr (int @var{status}, const char *@var{format}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @ascuintl{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c Same as vwarn followed by exit. +The @code{verr} function is just like @code{err} except that the +parameters for the handling of the format string @var{format} are passed +in as a value of type @code{va_list}. +@end deftypefun + +@comment err.h +@comment BSD +@deftypefun void errx (int @var{status}, const char *@var{format}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c Same as warnx followed by exit. +The @code{errx} function is roughly equivalent to a call like +@smallexample + error (status, 0, format, @r{the parameters}) +@end smallexample +@noindent +except that the global variables @code{error} respects and modifies +are not used and that the program is exited even if @var{status} +is zero. The difference to @code{err} is that no error number +string is printed. +@end deftypefun + +@comment err.h +@comment BSD +@deftypefun void verrx (int @var{status}, const char *@var{format}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c Same as vwarnx followed by exit. +The @code{verrx} function is just like @code{errx} except that the +parameters for the handling of the format string @var{format} are passed +in as a value of type @code{va_list}. +@end deftypefun diff --git a/REORG.TODO/manual/examples/README b/REORG.TODO/manual/examples/README new file mode 100644 index 0000000000..7d0070fdd5 --- /dev/null +++ b/REORG.TODO/manual/examples/README @@ -0,0 +1,8 @@ +These are source files for example code that appears in The GNU C +Library Reference Manual. + +While the manual itself is licensed under the terms of the GNU Free +Documentation License, you can use these source files on their own +under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License, or (at your +option) any later version. diff --git a/REORG.TODO/manual/examples/add.c b/REORG.TODO/manual/examples/add.c new file mode 100644 index 0000000000..9261f105da --- /dev/null +++ b/REORG.TODO/manual/examples/add.c @@ -0,0 +1,47 @@ +/* Example of a Variadic Function + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdarg.h> +#include <stdio.h> + +int +add_em_up (int count,...) +{ + va_list ap; + int i, sum; + + va_start (ap, count); /* Initialize the argument list. */ + + sum = 0; + for (i = 0; i < count; i++) + sum += va_arg (ap, int); /* Get the next argument value. */ + + va_end (ap); /* Clean up. */ + return sum; +} + +int +main (void) +{ + /* This call prints 16. */ + printf ("%d\n", add_em_up (3, 5, 5, 6)); + + /* This call prints 55. */ + printf ("%d\n", add_em_up (10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); + + return 0; +} diff --git a/REORG.TODO/manual/examples/argp-ex1.c b/REORG.TODO/manual/examples/argp-ex1.c new file mode 100644 index 0000000000..9058595e9b --- /dev/null +++ b/REORG.TODO/manual/examples/argp-ex1.c @@ -0,0 +1,31 @@ +/* Argp example #1 -- a minimal program using argp + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/* This is (probably) the smallest possible program that + uses argp. It won't do much except give an error + messages and exit when there are any arguments, and print + a (rather pointless) messages for --help. */ + +#include <stdlib.h> +#include <argp.h> + +int +main (int argc, char **argv) +{ + argp_parse (0, argc, argv, 0, 0, 0); + exit (0); +} diff --git a/REORG.TODO/manual/examples/argp-ex2.c b/REORG.TODO/manual/examples/argp-ex2.c new file mode 100644 index 0000000000..83bb85b107 --- /dev/null +++ b/REORG.TODO/manual/examples/argp-ex2.c @@ -0,0 +1,61 @@ +/* Argp example #2 -- a pretty minimal program using argp + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/* This program doesn't use any options or arguments, but uses + argp to be compliant with the GNU standard command line + format. + + In addition to making sure no arguments are given, and + implementing a --help option, this example will have a + --version option, and will put the given documentation string + and bug address in the --help output, as per GNU standards. + + The variable ARGP contains the argument parser specification; + adding fields to this structure is the way most parameters are + passed to argp_parse (the first three fields are usually used, + but not in this small program). There are also two global + variables that argp knows about defined here, + ARGP_PROGRAM_VERSION and ARGP_PROGRAM_BUG_ADDRESS (they are + global variables because they will almost always be constant + for a given program, even if it uses different argument + parsers for various tasks). */ + +#include <stdlib.h> +#include <argp.h> + +const char *argp_program_version = + "argp-ex2 1.0"; +const char *argp_program_bug_address = + "<bug-gnu-utils@@gnu.org>"; + +/* Program documentation. */ +static char doc[] = + "Argp example #2 -- a pretty minimal program using argp"; + +/* Our argument parser. The @code{options}, @code{parser}, and + @code{args_doc} fields are zero because we have neither options or + arguments; @code{doc} and @code{argp_program_bug_address} will be + used in the output for @samp{--help}, and the @samp{--version} + option will print out @code{argp_program_version}. */ +static struct argp argp = { 0, 0, 0, doc }; + +int +main (int argc, char **argv) +{ + argp_parse (&argp, argc, argv, 0, 0, 0); + exit (0); +} diff --git a/REORG.TODO/manual/examples/argp-ex3.c b/REORG.TODO/manual/examples/argp-ex3.c new file mode 100644 index 0000000000..e37ab73408 --- /dev/null +++ b/REORG.TODO/manual/examples/argp-ex3.c @@ -0,0 +1,169 @@ +/* Argp example #3 -- a program with options and arguments using argp + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/* This program uses the same features as example 2, and uses options and + arguments. + + We now use the first four fields in ARGP, so here's a description of them: + OPTIONS -- A pointer to a vector of struct argp_option (see below) + PARSER -- A function to parse a single option, called by argp + ARGS_DOC -- A string describing how the non-option arguments should look + DOC -- A descriptive string about this program; if it contains a + vertical tab character (\v), the part after it will be + printed *following* the options + + The function PARSER takes the following arguments: + KEY -- An integer specifying which option this is (taken + from the KEY field in each struct argp_option), or + a special key specifying something else; the only + special keys we use here are ARGP_KEY_ARG, meaning + a non-option argument, and ARGP_KEY_END, meaning + that all arguments have been parsed + ARG -- For an option KEY, the string value of its + argument, or NULL if it has none + STATE-- A pointer to a struct argp_state, containing + various useful information about the parsing state; used here + are the INPUT field, which reflects the INPUT argument to + argp_parse, and the ARG_NUM field, which is the number of the + current non-option argument being parsed + It should return either 0, meaning success, ARGP_ERR_UNKNOWN, meaning the + given KEY wasn't recognized, or an errno value indicating some other + error. + + Note that in this example, main uses a structure to communicate with the + parse_opt function, a pointer to which it passes in the INPUT argument to + argp_parse. Of course, it's also possible to use global variables + instead, but this is somewhat more flexible. + + The OPTIONS field contains a pointer to a vector of struct argp_option's; + that structure has the following fields (if you assign your option + structures using array initialization like this example, unspecified + fields will be defaulted to 0, and need not be specified): + NAME -- The name of this option's long option (may be zero) + KEY -- The KEY to pass to the PARSER function when parsing this option, + *and* the name of this option's short option, if it is a + printable ascii character + ARG -- The name of this option's argument, if any + FLAGS -- Flags describing this option; some of them are: + OPTION_ARG_OPTIONAL -- The argument to this option is optional + OPTION_ALIAS -- This option is an alias for the + previous option + OPTION_HIDDEN -- Don't show this option in --help output + DOC -- A documentation string for this option, shown in --help output + + An options vector should be terminated by an option with all fields zero. */ + +#include <stdlib.h> +#include <argp.h> + +const char *argp_program_version = + "argp-ex3 1.0"; +const char *argp_program_bug_address = + "<bug-gnu-utils@@gnu.org>"; + +/* Program documentation. */ +static char doc[] = + "Argp example #3 -- a program with options and arguments using argp"; + +/* A description of the arguments we accept. */ +static char args_doc[] = "ARG1 ARG2"; + +/* The options we understand. */ +static struct argp_option options[] = { + {"verbose", 'v', 0, 0, "Produce verbose output" }, + {"quiet", 'q', 0, 0, "Don't produce any output" }, + {"silent", 's', 0, OPTION_ALIAS }, + {"output", 'o', "FILE", 0, + "Output to FILE instead of standard output" }, + { 0 } +}; + +/* Used by @code{main} to communicate with @code{parse_opt}. */ +struct arguments +{ + char *args[2]; /* @var{arg1} & @var{arg2} */ + int silent, verbose; + char *output_file; +}; + +/* Parse a single option. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + /* Get the @var{input} argument from @code{argp_parse}, which we + know is a pointer to our arguments structure. */ + struct arguments *arguments = state->input; + + switch (key) + { + case 'q': case 's': + arguments->silent = 1; + break; + case 'v': + arguments->verbose = 1; + break; + case 'o': + arguments->output_file = arg; + break; + + case ARGP_KEY_ARG: + if (state->arg_num >= 2) + /* Too many arguments. */ + argp_usage (state); + + arguments->args[state->arg_num] = arg; + + break; + + case ARGP_KEY_END: + if (state->arg_num < 2) + /* Not enough arguments. */ + argp_usage (state); + break; + + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +/* Our argp parser. */ +static struct argp argp = { options, parse_opt, args_doc, doc }; + +int +main (int argc, char **argv) +{ + struct arguments arguments; + + /* Default values. */ + arguments.silent = 0; + arguments.verbose = 0; + arguments.output_file = "-"; + + /* Parse our arguments; every option seen by @code{parse_opt} will + be reflected in @code{arguments}. */ + argp_parse (&argp, argc, argv, 0, 0, &arguments); + + printf ("ARG1 = %s\nARG2 = %s\nOUTPUT_FILE = %s\n" + "VERBOSE = %s\nSILENT = %s\n", + arguments.args[0], arguments.args[1], + arguments.output_file, + arguments.verbose ? "yes" : "no", + arguments.silent ? "yes" : "no"); + + exit (0); +} diff --git a/REORG.TODO/manual/examples/argp-ex4.c b/REORG.TODO/manual/examples/argp-ex4.c new file mode 100644 index 0000000000..37218af23e --- /dev/null +++ b/REORG.TODO/manual/examples/argp-ex4.c @@ -0,0 +1,183 @@ +/* Argp example #4 -- a program with somewhat more complicated options + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/* This program uses the same features as example 3, but has more + options, and somewhat more structure in the -help output. It + also shows how you can `steal' the remainder of the input + arguments past a certain point, for programs that accept a + list of items. It also shows the special argp KEY value + ARGP_KEY_NO_ARGS, which is only given if no non-option + arguments were supplied to the program. + + For structuring the help output, two features are used, + *headers* which are entries in the options vector with the + first four fields being zero, and a two part documentation + string (in the variable DOC), which allows documentation both + before and after the options; the two parts of DOC are + separated by a vertical-tab character ('\v', or '\013'). By + convention, the documentation before the options is just a + short string saying what the program does, and that afterwards + is longer, describing the behavior in more detail. All + documentation strings are automatically filled for output, + although newlines may be included to force a line break at a + particular point. All documentation strings are also passed to + the `gettext' function, for possible translation into the + current locale. */ + +#include <stdlib.h> +#include <error.h> +#include <argp.h> + +const char *argp_program_version = + "argp-ex4 1.0"; +const char *argp_program_bug_address = + "<bug-gnu-utils@@prep.ai.mit.edu>"; + +/* Program documentation. */ +static char doc[] = + "Argp example #4 -- a program with somewhat more complicated\ +options\ +\vThis part of the documentation comes *after* the options;\ + note that the text is automatically filled, but it's possible\ + to force a line-break, e.g.\n<-- here."; + +/* A description of the arguments we accept. */ +static char args_doc[] = "ARG1 [STRING...]"; + +/* Keys for options without short-options. */ +#define OPT_ABORT 1 /* --abort */ + +/* The options we understand. */ +static struct argp_option options[] = { + {"verbose", 'v', 0, 0, "Produce verbose output" }, + {"quiet", 'q', 0, 0, "Don't produce any output" }, + {"silent", 's', 0, OPTION_ALIAS }, + {"output", 'o', "FILE", 0, + "Output to FILE instead of standard output" }, + + {0,0,0,0, "The following options should be grouped together:" }, + {"repeat", 'r', "COUNT", OPTION_ARG_OPTIONAL, + "Repeat the output COUNT (default 10) times"}, + {"abort", OPT_ABORT, 0, 0, "Abort before showing any output"}, + + { 0 } +}; + +/* Used by @code{main} to communicate with @code{parse_opt}. */ +struct arguments +{ + char *arg1; /* @var{arg1} */ + char **strings; /* [@var{string}@dots{}] */ + int silent, verbose, abort; /* @samp{-s}, @samp{-v}, @samp{--abort} */ + char *output_file; /* @var{file} arg to @samp{--output} */ + int repeat_count; /* @var{count} arg to @samp{--repeat} */ +}; + +/* Parse a single option. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + /* Get the @code{input} argument from @code{argp_parse}, which we + know is a pointer to our arguments structure. */ + struct arguments *arguments = state->input; + + switch (key) + { + case 'q': case 's': + arguments->silent = 1; + break; + case 'v': + arguments->verbose = 1; + break; + case 'o': + arguments->output_file = arg; + break; + case 'r': + arguments->repeat_count = arg ? atoi (arg) : 10; + break; + case OPT_ABORT: + arguments->abort = 1; + break; + + case ARGP_KEY_NO_ARGS: + argp_usage (state); + + case ARGP_KEY_ARG: + /* Here we know that @code{state->arg_num == 0}, since we + force argument parsing to end before any more arguments can + get here. */ + arguments->arg1 = arg; + + /* Now we consume all the rest of the arguments. + @code{state->next} is the index in @code{state->argv} of the + next argument to be parsed, which is the first @var{string} + we're interested in, so we can just use + @code{&state->argv[state->next]} as the value for + arguments->strings. + + @emph{In addition}, by setting @code{state->next} to the end + of the arguments, we can force argp to stop parsing here and + return. */ + arguments->strings = &state->argv[state->next]; + state->next = state->argc; + + break; + + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +/* Our argp parser. */ +static struct argp argp = { options, parse_opt, args_doc, doc }; + +int +main (int argc, char **argv) +{ + int i, j; + struct arguments arguments; + + /* Default values. */ + arguments.silent = 0; + arguments.verbose = 0; + arguments.output_file = "-"; + arguments.repeat_count = 1; + arguments.abort = 0; + + /* Parse our arguments; every option seen by @code{parse_opt} will be + reflected in @code{arguments}. */ + argp_parse (&argp, argc, argv, 0, 0, &arguments); + + if (arguments.abort) + error (10, 0, "ABORTED"); + + for (i = 0; i < arguments.repeat_count; i++) + { + printf ("ARG1 = %s\n", arguments.arg1); + printf ("STRINGS = "); + for (j = 0; arguments.strings[j]; j++) + printf (j == 0 ? "%s" : ", %s", arguments.strings[j]); + printf ("\n"); + printf ("OUTPUT_FILE = %s\nVERBOSE = %s\nSILENT = %s\n", + arguments.output_file, + arguments.verbose ? "yes" : "no", + arguments.silent ? "yes" : "no"); + } + + exit (0); +} diff --git a/REORG.TODO/manual/examples/atexit.c b/REORG.TODO/manual/examples/atexit.c new file mode 100644 index 0000000000..fe14930ef8 --- /dev/null +++ b/REORG.TODO/manual/examples/atexit.c @@ -0,0 +1,32 @@ +/* Cleanups on Exit + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <stdlib.h> + +void +bye (void) +{ + puts ("Goodbye, cruel world...."); +} + +int +main (void) +{ + atexit (bye); + exit (EXIT_SUCCESS); +} diff --git a/REORG.TODO/manual/examples/db.c b/REORG.TODO/manual/examples/db.c new file mode 100644 index 0000000000..652662682d --- /dev/null +++ b/REORG.TODO/manual/examples/db.c @@ -0,0 +1,69 @@ +/* User and Group Database Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <grp.h> +#include <pwd.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdlib.h> + +int +main (void) +{ + uid_t me; + struct passwd *my_passwd; + struct group *my_group; + char **members; + + /* Get information about the user ID. */ + me = getuid (); + my_passwd = getpwuid (me); + if (!my_passwd) + { + printf ("Couldn't find out about user %d.\n", (int) me); + exit (EXIT_FAILURE); + } + + /* Print the information. */ + printf ("I am %s.\n", my_passwd->pw_gecos); + printf ("My login name is %s.\n", my_passwd->pw_name); + printf ("My uid is %d.\n", (int) (my_passwd->pw_uid)); + printf ("My home directory is %s.\n", my_passwd->pw_dir); + printf ("My default shell is %s.\n", my_passwd->pw_shell); + + /* Get information about the default group ID. */ + my_group = getgrgid (my_passwd->pw_gid); + if (!my_group) + { + printf ("Couldn't find out about group %d.\n", + (int) my_passwd->pw_gid); + exit (EXIT_FAILURE); + } + + /* Print the information. */ + printf ("My default group is %s (%d).\n", + my_group->gr_name, (int) (my_passwd->pw_gid)); + printf ("The members of this group are:\n"); + members = my_group->gr_mem; + while (*members) + { + printf (" %s\n", *(members)); + members++; + } + + return EXIT_SUCCESS; +} diff --git a/REORG.TODO/manual/examples/dir.c b/REORG.TODO/manual/examples/dir.c new file mode 100644 index 0000000000..da6eef2a46 --- /dev/null +++ b/REORG.TODO/manual/examples/dir.c @@ -0,0 +1,41 @@ +/* Simple Program to List a Directory + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/*@group*/ +#include <stdio.h> +#include <sys/types.h> +#include <dirent.h> +/*@end group*/ + +int +main (void) +{ + DIR *dp; + struct dirent *ep; + + dp = opendir ("./"); + if (dp != NULL) + { + while (ep = readdir (dp)) + puts (ep->d_name); + (void) closedir (dp); + } + else + perror ("Couldn't open the directory"); + + return 0; +} diff --git a/REORG.TODO/manual/examples/dir2.c b/REORG.TODO/manual/examples/dir2.c new file mode 100644 index 0000000000..a8390822b4 --- /dev/null +++ b/REORG.TODO/manual/examples/dir2.c @@ -0,0 +1,46 @@ +/* Simple Program to List a Directory, Mark II + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/*@group*/ +#include <stdio.h> +#include <dirent.h> +/*@end group*/ + +static int +one (const struct dirent *unused) +{ + return 1; +} + +int +main (void) +{ + struct dirent **eps; + int n; + + n = scandir ("./", &eps, one, alphasort); + if (n >= 0) + { + int cnt; + for (cnt = 0; cnt < n; ++cnt) + puts (eps[cnt]->d_name); + } + else + perror ("Couldn't open the directory"); + + return 0; +} diff --git a/REORG.TODO/manual/examples/execinfo.c b/REORG.TODO/manual/examples/execinfo.c new file mode 100644 index 0000000000..c357fa48ed --- /dev/null +++ b/REORG.TODO/manual/examples/execinfo.c @@ -0,0 +1,54 @@ +/* Obtain a backtrace and print it. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <execinfo.h> +#include <stdio.h> +#include <stdlib.h> + +/* Obtain a backtrace and print it to @code{stdout}. */ +void +print_trace (void) +{ + void *array[10]; + size_t size; + char **strings; + size_t i; + + size = backtrace (array, 10); + strings = backtrace_symbols (array, size); + + printf ("Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf ("%s\n", strings[i]); + + free (strings); +} + +/* A dummy function to make the backtrace more interesting. */ +void +dummy_function (void) +{ + print_trace (); +} + +int +main (void) +{ + dummy_function (); + return 0; +} diff --git a/REORG.TODO/manual/examples/filecli.c b/REORG.TODO/manual/examples/filecli.c new file mode 100644 index 0000000000..50f4ef43ac --- /dev/null +++ b/REORG.TODO/manual/examples/filecli.c @@ -0,0 +1,71 @@ +/* Example of Reading Datagrams + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/un.h> + +#define SERVER "/tmp/serversocket" +#define CLIENT "/tmp/mysocket" +#define MAXMSG 512 +#define MESSAGE "Yow!!! Are we having fun yet?!?" + +int +main (void) +{ + extern int make_named_socket (const char *name); + int sock; + char message[MAXMSG]; + struct sockaddr_un name; + size_t size; + int nbytes; + + /* Make the socket. */ + sock = make_named_socket (CLIENT); + + /* Initialize the server socket address. */ + name.sun_family = AF_LOCAL; + strcpy (name.sun_path, SERVER); + size = strlen (name.sun_path) + sizeof (name.sun_family); + + /* Send the datagram. */ + nbytes = sendto (sock, MESSAGE, strlen (MESSAGE) + 1, 0, + (struct sockaddr *) & name, size); + if (nbytes < 0) + { + perror ("sendto (client)"); + exit (EXIT_FAILURE); + } + + /* Wait for a reply. */ + nbytes = recvfrom (sock, message, MAXMSG, 0, NULL, 0); + if (nbytes < 0) + { + perror ("recfrom (client)"); + exit (EXIT_FAILURE); + } + + /* Print a diagnostic message. */ + fprintf (stderr, "Client: got message: %s\n", message); + + /* Clean up. */ + remove (CLIENT); + close (sock); +} diff --git a/REORG.TODO/manual/examples/filesrv.c b/REORG.TODO/manual/examples/filesrv.c new file mode 100644 index 0000000000..9cf3d0ee91 --- /dev/null +++ b/REORG.TODO/manual/examples/filesrv.c @@ -0,0 +1,65 @@ +/* Datagram Socket Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/un.h> + +#define SERVER "/tmp/serversocket" +#define MAXMSG 512 + +int +main (void) +{ + int sock; + char message[MAXMSG]; + struct sockaddr_un name; + size_t size; + int nbytes; + + /* Remove the filename first, it's ok if the call fails */ + unlink (SERVER); + + /* Make the socket, then loop endlessly. */ + sock = make_named_socket (SERVER); + while (1) + { + /* Wait for a datagram. */ + size = sizeof (name); + nbytes = recvfrom (sock, message, MAXMSG, 0, + (struct sockaddr *) & name, &size); + if (nbytes < 0) + { + perror ("recfrom (server)"); + exit (EXIT_FAILURE); + } + + /* Give a diagnostic message. */ + fprintf (stderr, "Server: got message: %s\n", message); + + /* Bounce the message back to the sender. */ + nbytes = sendto (sock, message, nbytes, 0, + (struct sockaddr *) & name, size); + if (nbytes < 0) + { + perror ("sendto (server)"); + exit (EXIT_FAILURE); + } + } +} diff --git a/REORG.TODO/manual/examples/fmtmsgexpl.c b/REORG.TODO/manual/examples/fmtmsgexpl.c new file mode 100644 index 0000000000..5cd965cd3f --- /dev/null +++ b/REORG.TODO/manual/examples/fmtmsgexpl.c @@ -0,0 +1,29 @@ +/* How to use fmtmsg and addseverity. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <fmtmsg.h> + +int +main (void) +{ + addseverity (5, "NOTE2"); + fmtmsg (MM_PRINT, "only1field", MM_INFO, "text2", "action2", "tag2"); + fmtmsg (MM_PRINT, "UX:cat", 5, "invalid syntax", "refer to manual", + "UX:cat:001"); + fmtmsg (MM_PRINT, "label:foo", 6, "text", "action", "tag"); + return 0; +} diff --git a/REORG.TODO/manual/examples/genpass.c b/REORG.TODO/manual/examples/genpass.c new file mode 100644 index 0000000000..7d76dedfbc --- /dev/null +++ b/REORG.TODO/manual/examples/genpass.c @@ -0,0 +1,49 @@ +/* Encrypting Passwords + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <time.h> +#include <unistd.h> +#include <crypt.h> + +int +main(void) +{ + unsigned long seed[2]; + char salt[] = "$1$........"; + const char *const seedchars = + "./0123456789ABCDEFGHIJKLMNOPQRST" + "UVWXYZabcdefghijklmnopqrstuvwxyz"; + char *password; + int i; + + /* Generate a (not very) random seed. + You should do it better than this... */ + seed[0] = time(NULL); + seed[1] = getpid() ^ (seed[0] >> 14 & 0x30000); + + /* Turn it into printable characters from `seedchars'. */ + for (i = 0; i < 8; i++) + salt[3+i] = seedchars[(seed[i/5] >> (i%5)*6) & 0x3f]; + + /* Read in the user's password and encrypt it. */ + password = crypt(getpass("Password:"), salt); + + /* Print the results. */ + puts(password); + return 0; +} diff --git a/REORG.TODO/manual/examples/inetcli.c b/REORG.TODO/manual/examples/inetcli.c new file mode 100644 index 0000000000..1723e2fc56 --- /dev/null +++ b/REORG.TODO/manual/examples/inetcli.c @@ -0,0 +1,76 @@ +/* Byte Stream Socket Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netdb.h> + +#define PORT 5555 +#define MESSAGE "Yow!!! Are we having fun yet?!?" +#define SERVERHOST "www.gnu.org" + +void +write_to_server (int filedes) +{ + int nbytes; + + nbytes = write (filedes, MESSAGE, strlen (MESSAGE) + 1); + if (nbytes < 0) + { + perror ("write"); + exit (EXIT_FAILURE); + } +} + + +int +main (void) +{ + extern void init_sockaddr (struct sockaddr_in *name, + const char *hostname, + uint16_t port); + int sock; + struct sockaddr_in servername; + + /* Create the socket. */ + sock = socket (PF_INET, SOCK_STREAM, 0); + if (sock < 0) + { + perror ("socket (client)"); + exit (EXIT_FAILURE); + } + + /* Connect to the server. */ + init_sockaddr (&servername, SERVERHOST, PORT); + if (0 > connect (sock, + (struct sockaddr *) &servername, + sizeof (servername))) + { + perror ("connect (client)"); + exit (EXIT_FAILURE); + } + + /* Send data to the server. */ + write_to_server (sock); + close (sock); + exit (EXIT_SUCCESS); +} diff --git a/REORG.TODO/manual/examples/inetsrv.c b/REORG.TODO/manual/examples/inetsrv.c new file mode 100644 index 0000000000..01ae631818 --- /dev/null +++ b/REORG.TODO/manual/examples/inetsrv.c @@ -0,0 +1,120 @@ +/* Byte Stream Connection Server Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netdb.h> + +#define PORT 5555 +#define MAXMSG 512 + +int +read_from_client (int filedes) +{ + char buffer[MAXMSG]; + int nbytes; + + nbytes = read (filedes, buffer, MAXMSG); + if (nbytes < 0) + { + /* Read error. */ + perror ("read"); + exit (EXIT_FAILURE); + } + else if (nbytes == 0) + /* End-of-file. */ + return -1; + else + { + /* Data read. */ + fprintf (stderr, "Server: got message: `%s'\n", buffer); + return 0; + } +} + +int +main (void) +{ + extern int make_socket (uint16_t port); + int sock; + fd_set active_fd_set, read_fd_set; + int i; + struct sockaddr_in clientname; + size_t size; + + /* Create the socket and set it up to accept connections. */ + sock = make_socket (PORT); + if (listen (sock, 1) < 0) + { + perror ("listen"); + exit (EXIT_FAILURE); + } + + /* Initialize the set of active sockets. */ + FD_ZERO (&active_fd_set); + FD_SET (sock, &active_fd_set); + + while (1) + { + /* Block until input arrives on one or more active sockets. */ + read_fd_set = active_fd_set; + if (select (FD_SETSIZE, &read_fd_set, NULL, NULL, NULL) < 0) + { + perror ("select"); + exit (EXIT_FAILURE); + } + + /* Service all the sockets with input pending. */ + for (i = 0; i < FD_SETSIZE; ++i) + if (FD_ISSET (i, &read_fd_set)) + { + if (i == sock) + { + /* Connection request on original socket. */ + int new; + size = sizeof (clientname); + new = accept (sock, + (struct sockaddr *) &clientname, + &size); + if (new < 0) + { + perror ("accept"); + exit (EXIT_FAILURE); + } + fprintf (stderr, + "Server: connect from host %s, port %hd.\n", + inet_ntoa (clientname.sin_addr), + ntohs (clientname.sin_port)); + FD_SET (new, &active_fd_set); + } + else + { + /* Data arriving on an already-connected socket. */ + if (read_from_client (i) < 0) + { + close (i); + FD_CLR (i, &active_fd_set); + } + } + } + } +} diff --git a/REORG.TODO/manual/examples/isockad.c b/REORG.TODO/manual/examples/isockad.c new file mode 100644 index 0000000000..be2def3be8 --- /dev/null +++ b/REORG.TODO/manual/examples/isockad.c @@ -0,0 +1,40 @@ +/* Internet Socket Example using sockaddr_in. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netdb.h> + +void +init_sockaddr (struct sockaddr_in *name, + const char *hostname, + uint16_t port) +{ + struct hostent *hostinfo; + + name->sin_family = AF_INET; + name->sin_port = htons (port); + hostinfo = gethostbyname (hostname); + if (hostinfo == NULL) + { + fprintf (stderr, "Unknown host %s.\n", hostname); + exit (EXIT_FAILURE); + } + name->sin_addr = *(struct in_addr *) hostinfo->h_addr; +} diff --git a/REORG.TODO/manual/examples/longopt.c b/REORG.TODO/manual/examples/longopt.c new file mode 100644 index 0000000000..40967174aa --- /dev/null +++ b/REORG.TODO/manual/examples/longopt.c @@ -0,0 +1,113 @@ +/* Example of Parsing Long Options with getopt_long. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <getopt.h> + +/* Flag set by @samp{--verbose}. */ +static int verbose_flag; + +int +main (int argc, char **argv) +{ + int c; + + while (1) + { + static struct option long_options[] = + { + /* These options set a flag. */ + {"verbose", no_argument, &verbose_flag, 1}, + {"brief", no_argument, &verbose_flag, 0}, + /* These options don't set a flag. + We distinguish them by their indices. */ + {"add", no_argument, 0, 'a'}, + {"append", no_argument, 0, 'b'}, + {"delete", required_argument, 0, 'd'}, + {"create", required_argument, 0, 'c'}, + {"file", required_argument, 0, 'f'}, + {0, 0, 0, 0} + }; + /* @code{getopt_long} stores the option index here. */ + int option_index = 0; + + c = getopt_long (argc, argv, "abc:d:f:", + long_options, &option_index); + + /* Detect the end of the options. */ + if (c == -1) + break; + + switch (c) + { + case 0: + /* If this option set a flag, do nothing else now. */ + if (long_options[option_index].flag != 0) + break; + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case 'a': + puts ("option -a\n"); + break; + + case 'b': + puts ("option -b\n"); + break; + + case 'c': + printf ("option -c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option -d with value `%s'\n", optarg); + break; + + case 'f': + printf ("option -f with value `%s'\n", optarg); + break; + + case '?': + /* @code{getopt_long} already printed an error message. */ + break; + + default: + abort (); + } + } + + /* Instead of reporting @samp{--verbose} + and @samp{--brief} as they are encountered, + we report the final status resulting from them. */ + if (verbose_flag) + puts ("verbose flag is set"); + + /* Print any remaining command line arguments (not options). */ + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + putchar ('\n'); + } + + exit (0); +} diff --git a/REORG.TODO/manual/examples/memopen.c b/REORG.TODO/manual/examples/memopen.c new file mode 100644 index 0000000000..407aeba878 --- /dev/null +++ b/REORG.TODO/manual/examples/memopen.c @@ -0,0 +1,34 @@ +/* String Streams + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> + +static char buffer[] = "foobar"; + +int +main (void) +{ + int ch; + FILE *stream; + + stream = fmemopen (buffer, strlen (buffer), "r"); + while ((ch = fgetc (stream)) != EOF) + printf ("Got %c\n", ch); + fclose (stream); + + return 0; +} diff --git a/REORG.TODO/manual/examples/memstrm.c b/REORG.TODO/manual/examples/memstrm.c new file mode 100644 index 0000000000..d339ffb95d --- /dev/null +++ b/REORG.TODO/manual/examples/memstrm.c @@ -0,0 +1,36 @@ +/* open_memstream example. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> + +int +main (void) +{ + char *bp; + size_t size; + FILE *stream; + + stream = open_memstream (&bp, &size); + fprintf (stream, "hello"); + fflush (stream); + printf ("buf = `%s', size = %zu\n", bp, size); + fprintf (stream, ", world"); + fclose (stream); + printf ("buf = `%s', size = %zu\n", bp, size); + + return 0; +} diff --git a/REORG.TODO/manual/examples/mkdirent.c b/REORG.TODO/manual/examples/mkdirent.c new file mode 100644 index 0000000000..988e988460 --- /dev/null +++ b/REORG.TODO/manual/examples/mkdirent.c @@ -0,0 +1,42 @@ +/* Example for creating a struct dirent object for use with glob. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <dirent.h> +#include <errno.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +struct dirent * +mkdirent (const char *name) +{ + size_t dirent_size = offsetof (struct dirent, d_name) + 1; + size_t name_length = strlen (name); + size_t total_size = dirent_size + name_length; + if (total_size < dirent_size) + { + errno = ENOMEM; + return NULL; + } + struct dirent *result = malloc (total_size); + if (result == NULL) + return NULL; + result->d_type = DT_UNKNOWN; + result->d_ino = 1; /* Do not skip this entry. */ + memcpy (result->d_name, name, name_length + 1); + return result; +} diff --git a/REORG.TODO/manual/examples/mkfsock.c b/REORG.TODO/manual/examples/mkfsock.c new file mode 100644 index 0000000000..ffe1b46928 --- /dev/null +++ b/REORG.TODO/manual/examples/mkfsock.c @@ -0,0 +1,62 @@ +/* Example of Local-Namespace Sockets + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stddef.h> +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/un.h> + +int +make_named_socket (const char *filename) +{ + struct sockaddr_un name; + int sock; + size_t size; + + /* Create the socket. */ + sock = socket (PF_LOCAL, SOCK_DGRAM, 0); + if (sock < 0) + { + perror ("socket"); + exit (EXIT_FAILURE); + } + + /* Bind a name to the socket. */ + name.sun_family = AF_LOCAL; + strncpy (name.sun_path, filename, sizeof (name.sun_path)); + name.sun_path[sizeof (name.sun_path) - 1] = '\0'; + + /* The size of the address is + the offset of the start of the filename, + plus its length (not including the terminating null byte). + Alternatively you can just do: + size = SUN_LEN (&name); + */ + size = (offsetof (struct sockaddr_un, sun_path) + + strlen (name.sun_path)); + + if (bind (sock, (struct sockaddr *) &name, size) < 0) + { + perror ("bind"); + exit (EXIT_FAILURE); + } + + return sock; +} diff --git a/REORG.TODO/manual/examples/mkisock.c b/REORG.TODO/manual/examples/mkisock.c new file mode 100644 index 0000000000..5657322631 --- /dev/null +++ b/REORG.TODO/manual/examples/mkisock.c @@ -0,0 +1,48 @@ +/* Internet Socket Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <netinet/in.h> + +int +make_socket (uint16_t port) +{ + int sock; + struct sockaddr_in name; + + /* Create the socket. */ + sock = socket (PF_INET, SOCK_STREAM, 0); + if (sock < 0) + { + perror ("socket"); + exit (EXIT_FAILURE); + } + + /* Give the socket a name. */ + name.sin_family = AF_INET; + name.sin_port = htons (port); + name.sin_addr.s_addr = htonl (INADDR_ANY); + if (bind (sock, (struct sockaddr *) &name, sizeof (name)) < 0) + { + perror ("bind"); + exit (EXIT_FAILURE); + } + + return sock; +} diff --git a/REORG.TODO/manual/examples/mygetpass.c b/REORG.TODO/manual/examples/mygetpass.c new file mode 100644 index 0000000000..152d7bee7f --- /dev/null +++ b/REORG.TODO/manual/examples/mygetpass.c @@ -0,0 +1,42 @@ +/* Reading Passwords + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <termios.h> +#include <stdio.h> + +ssize_t +my_getpass (char **lineptr, size_t *n, FILE *stream) +{ + struct termios old, new; + int nread; + + /* Turn echoing off and fail if we can't. */ + if (tcgetattr (fileno (stream), &old) != 0) + return -1; + new = old; + new.c_lflag &= ~ECHO; + if (tcsetattr (fileno (stream), TCSAFLUSH, &new) != 0) + return -1; + + /* Read the password. */ + nread = getline (lineptr, n, stream); + + /* Restore terminal. */ + (void) tcsetattr (fileno (stream), TCSAFLUSH, &old); + + return nread; +} diff --git a/REORG.TODO/manual/examples/ofdlocks.c b/REORG.TODO/manual/examples/ofdlocks.c new file mode 100644 index 0000000000..42074105a8 --- /dev/null +++ b/REORG.TODO/manual/examples/ofdlocks.c @@ -0,0 +1,77 @@ +/* Open File Description Locks Usage Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#define _GNU_SOURCE +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <pthread.h> + +#define FILENAME "/tmp/foo" +#define NUM_THREADS 3 +#define ITERATIONS 5 + +void * +thread_start (void *arg) +{ + int i, fd, len; + long tid = (long) arg; + char buf[256]; + struct flock lck = { + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 1, + }; + + fd = open ("/tmp/foo", O_RDWR | O_CREAT, 0666); + + for (i = 0; i < ITERATIONS; i++) + { + lck.l_type = F_WRLCK; + fcntl (fd, F_OFD_SETLKW, &lck); + + len = sprintf (buf, "%d: tid=%ld fd=%d\n", i, tid, fd); + + lseek (fd, 0, SEEK_END); + write (fd, buf, len); + fsync (fd); + + lck.l_type = F_UNLCK; + fcntl (fd, F_OFD_SETLK, &lck); + + /* sleep to ensure lock is yielded to another thread */ + usleep (1); + } + pthread_exit (NULL); +} + +int +main (int argc, char **argv) +{ + long i; + pthread_t threads[NUM_THREADS]; + + truncate (FILENAME, 0); + + for (i = 0; i < NUM_THREADS; i++) + pthread_create (&threads[i], NULL, thread_start, (void *) i); + + pthread_exit (NULL); + return 0; +} diff --git a/REORG.TODO/manual/examples/pipe.c b/REORG.TODO/manual/examples/pipe.c new file mode 100644 index 0000000000..adfa4658dc --- /dev/null +++ b/REORG.TODO/manual/examples/pipe.c @@ -0,0 +1,87 @@ +/* Creating a Pipe + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> + +/* Read characters from the pipe and echo them to @code{stdout}. */ + +void +read_from_pipe (int file) +{ + FILE *stream; + int c; + stream = fdopen (file, "r"); + while ((c = fgetc (stream)) != EOF) + putchar (c); + fclose (stream); +} + +/* Write some random text to the pipe. */ + +void +write_to_pipe (int file) +{ + FILE *stream; + stream = fdopen (file, "w"); + fprintf (stream, "hello, world!\n"); + fprintf (stream, "goodbye, world!\n"); + fclose (stream); +} + +int +main (void) +{ + pid_t pid; + int mypipe[2]; + +/*@group*/ + /* Create the pipe. */ + if (pipe (mypipe)) + { + fprintf (stderr, "Pipe failed.\n"); + return EXIT_FAILURE; + } +/*@end group*/ + + /* Create the child process. */ + pid = fork (); + if (pid == (pid_t) 0) + { + /* This is the child process. + Close other end first. */ + close (mypipe[1]); + read_from_pipe (mypipe[0]); + return EXIT_SUCCESS; + } + else if (pid < (pid_t) 0) + { + /* The fork failed. */ + fprintf (stderr, "Fork failed.\n"); + return EXIT_FAILURE; + } + else + { + /* This is the parent process. + Close other end first. */ + close (mypipe[0]); + write_to_pipe (mypipe[1]); + return EXIT_SUCCESS; + } +} diff --git a/REORG.TODO/manual/examples/popen.c b/REORG.TODO/manual/examples/popen.c new file mode 100644 index 0000000000..cc7617c65c --- /dev/null +++ b/REORG.TODO/manual/examples/popen.c @@ -0,0 +1,55 @@ +/* Pipe to a Subprocess + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <stdlib.h> + +void +write_data (FILE * stream) +{ + int i; + for (i = 0; i < 100; i++) + fprintf (stream, "%d\n", i); + if (ferror (stream)) + { + fprintf (stderr, "Output to stream failed.\n"); + exit (EXIT_FAILURE); + } +} + +/*@group*/ +int +main (void) +{ + FILE *output; + + output = popen ("more", "w"); + if (!output) + { + fprintf (stderr, + "incorrect parameters or too many files.\n"); + return EXIT_FAILURE; + } + write_data (output); + if (pclose (output) != 0) + { + fprintf (stderr, + "Could not run more or other error.\n"); + } + return EXIT_SUCCESS; +} +/*@end group*/ diff --git a/REORG.TODO/manual/examples/rprintf.c b/REORG.TODO/manual/examples/rprintf.c new file mode 100644 index 0000000000..e49a2c4fbf --- /dev/null +++ b/REORG.TODO/manual/examples/rprintf.c @@ -0,0 +1,84 @@ +/* Printf Extension Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <printf.h> + +/*@group*/ +typedef struct +{ + char *name; +} +Widget; +/*@end group*/ + +int +print_widget (FILE *stream, + const struct printf_info *info, + const void *const *args) +{ + const Widget *w; + char *buffer; + int len; + + /* Format the output into a string. */ + w = *((const Widget **) (args[0])); + len = asprintf (&buffer, "<Widget %p: %s>", w, w->name); + if (len == -1) + return -1; + + /* Pad to the minimum field width and print to the stream. */ + len = fprintf (stream, "%*s", + (info->left ? -info->width : info->width), + buffer); + + /* Clean up and return. */ + free (buffer); + return len; +} + + +int +print_widget_arginfo (const struct printf_info *info, size_t n, + int *argtypes) +{ + /* We always take exactly one argument and this is a pointer to the + structure.. */ + if (n > 0) + argtypes[0] = PA_POINTER; + return 1; +} + + +int +main (void) +{ + /* Make a widget to print. */ + Widget mywidget; + mywidget.name = "mywidget"; + + /* Register the print function for widgets. */ + register_printf_function ('W', print_widget, print_widget_arginfo); + + /* Now print the widget. */ + printf ("|%W|\n", &mywidget); + printf ("|%35W|\n", &mywidget); + printf ("|%-35W|\n", &mywidget); + + return 0; +} diff --git a/REORG.TODO/manual/examples/search.c b/REORG.TODO/manual/examples/search.c new file mode 100644 index 0000000000..c434c11f8c --- /dev/null +++ b/REORG.TODO/manual/examples/search.c @@ -0,0 +1,113 @@ +/* Searching and Sorting Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +/* Define an array of critters to sort. */ + +struct critter + { + const char *name; + const char *species; + }; + +struct critter muppets[] = + { + {"Kermit", "frog"}, + {"Piggy", "pig"}, + {"Gonzo", "whatever"}, + {"Fozzie", "bear"}, + {"Sam", "eagle"}, + {"Robin", "frog"}, + {"Animal", "animal"}, + {"Camilla", "chicken"}, + {"Sweetums", "monster"}, + {"Dr. Strangepork", "pig"}, + {"Link Hogthrob", "pig"}, + {"Zoot", "human"}, + {"Dr. Bunsen Honeydew", "human"}, + {"Beaker", "human"}, + {"Swedish Chef", "human"} + }; + +int count = sizeof (muppets) / sizeof (struct critter); + + + +/* This is the comparison function used for sorting and searching. */ + +int +critter_cmp (const void *v1, const void *v2) +{ + const struct critter *c1 = v1; + const struct critter *c2 = v2; + + return strcmp (c1->name, c2->name); +} + + +/* Print information about a critter. */ + +void +print_critter (const struct critter *c) +{ + printf ("%s, the %s\n", c->name, c->species); +} + + +/*@group*/ +/* Do the lookup into the sorted array. */ + +void +find_critter (const char *name) +{ + struct critter target, *result; + target.name = name; + result = bsearch (&target, muppets, count, sizeof (struct critter), + critter_cmp); + if (result) + print_critter (result); + else + printf ("Couldn't find %s.\n", name); +} +/*@end group*/ + +/* Main program. */ + +int +main (void) +{ + int i; + + for (i = 0; i < count; i++) + print_critter (&muppets[i]); + printf ("\n"); + + qsort (muppets, count, sizeof (struct critter), critter_cmp); + + for (i = 0; i < count; i++) + print_critter (&muppets[i]); + printf ("\n"); + + find_critter ("Kermit"); + find_critter ("Gonzo"); + find_critter ("Janice"); + + return 0; +} diff --git a/REORG.TODO/manual/examples/select.c b/REORG.TODO/manual/examples/select.c new file mode 100644 index 0000000000..16c54c0c63 --- /dev/null +++ b/REORG.TODO/manual/examples/select.c @@ -0,0 +1,58 @@ +/* Waiting for Input or Output + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/*@group*/ +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/time.h> +/*@end group*/ + +/*@group*/ +int +input_timeout (int filedes, unsigned int seconds) +{ + fd_set set; + struct timeval timeout; +/*@end group*/ + + /* Initialize the file descriptor set. */ + FD_ZERO (&set); + FD_SET (filedes, &set); + + /* Initialize the timeout data structure. */ + timeout.tv_sec = seconds; + timeout.tv_usec = 0; + +/*@group*/ + /* @code{select} returns 0 if timeout, 1 if input available, -1 if error. */ + return TEMP_FAILURE_RETRY (select (FD_SETSIZE, + &set, NULL, NULL, + &timeout)); +} +/*@end group*/ + +/*@group*/ +int +main (void) +{ + fprintf (stderr, "select returned %d.\n", + input_timeout (STDIN_FILENO, 5)); + return 0; +} +/*@end group*/ diff --git a/REORG.TODO/manual/examples/setjmp.c b/REORG.TODO/manual/examples/setjmp.c new file mode 100644 index 0000000000..20b5064914 --- /dev/null +++ b/REORG.TODO/manual/examples/setjmp.c @@ -0,0 +1,49 @@ +/* Introduction to Non-Local Exits + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <setjmp.h> +#include <stdlib.h> +#include <stdio.h> + +jmp_buf main_loop; + +void +abort_to_main_loop (int status) +{ + longjmp (main_loop, status); +} + +int +main (void) +{ + while (1) + if (setjmp (main_loop)) + puts ("Back at main loop...."); + else + do_command (); +} + + +void +do_command (void) +{ + char buffer[128]; + if (fgets (buffer, 128, stdin) == NULL) + abort_to_main_loop (-1); + else + exit (EXIT_SUCCESS); +} diff --git a/REORG.TODO/manual/examples/sigh1.c b/REORG.TODO/manual/examples/sigh1.c new file mode 100644 index 0000000000..6d4a0752f3 --- /dev/null +++ b/REORG.TODO/manual/examples/sigh1.c @@ -0,0 +1,53 @@ +/* Signal Handlers that Return + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> + +/* This flag controls termination of the main loop. */ +volatile sig_atomic_t keep_going = 1; + +/* The signal handler just clears the flag and re-enables itself. */ +void +catch_alarm (int sig) +{ + keep_going = 0; + signal (sig, catch_alarm); +} + +void +do_stuff (void) +{ + puts ("Doing stuff while waiting for alarm...."); +} + +int +main (void) +{ + /* Establish a handler for SIGALRM signals. */ + signal (SIGALRM, catch_alarm); + + /* Set an alarm to go off in a little while. */ + alarm (2); + + /* Check the flag once in a while to see when to quit. */ + while (keep_going) + do_stuff (); + + return EXIT_SUCCESS; +} diff --git a/REORG.TODO/manual/examples/sigusr.c b/REORG.TODO/manual/examples/sigusr.c new file mode 100644 index 0000000000..51b44dcf03 --- /dev/null +++ b/REORG.TODO/manual/examples/sigusr.c @@ -0,0 +1,78 @@ +/* Using kill for Communication + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/*@group*/ +#include <signal.h> +#include <stdio.h> +#include <sys/types.h> +#include <unistd.h> +/*@end group*/ + +/* When a @code{SIGUSR1} signal arrives, set this variable. */ +volatile sig_atomic_t usr_interrupt = 0; + +void +synch_signal (int sig) +{ + usr_interrupt = 1; +} + +/* The child process executes this function. */ +void +child_function (void) +{ + /* Perform initialization. */ + printf ("I'm here!!! My pid is %d.\n", (int) getpid ()); + + /* Let parent know you're done. */ + kill (getppid (), SIGUSR1); + + /* Continue with execution. */ + puts ("Bye, now...."); + exit (0); +} + +int +main (void) +{ + struct sigaction usr_action; + sigset_t block_mask; + pid_t child_id; + + /* Establish the signal handler. */ + sigfillset (&block_mask); + usr_action.sa_handler = synch_signal; + usr_action.sa_mask = block_mask; + usr_action.sa_flags = 0; + sigaction (SIGUSR1, &usr_action, NULL); + + /* Create the child process. */ + child_id = fork (); + if (child_id == 0) + child_function (); /* Does not return. */ + +/*@group*/ + /* Busy wait for the child to send a signal. */ + while (!usr_interrupt) + ; +/*@end group*/ + + /* Now continue execution. */ + puts ("That's all, folks!"); + + return 0; +} diff --git a/REORG.TODO/manual/examples/stpcpy.c b/REORG.TODO/manual/examples/stpcpy.c new file mode 100644 index 0000000000..a0dda1757a --- /dev/null +++ b/REORG.TODO/manual/examples/stpcpy.c @@ -0,0 +1,30 @@ +/* stpcpy example. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <string.h> +#include <stdio.h> + +int +main (void) +{ + char buffer[10]; + char *to = buffer; + to = stpcpy (to, "foo"); + to = stpcpy (to, "bar"); + puts (buffer); + return 0; +} diff --git a/REORG.TODO/manual/examples/strdupa.c b/REORG.TODO/manual/examples/strdupa.c new file mode 100644 index 0000000000..c8e61bac16 --- /dev/null +++ b/REORG.TODO/manual/examples/strdupa.c @@ -0,0 +1,36 @@ +/* strdupa example. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <paths.h> +#include <string.h> +#include <stdio.h> + +const char path[] = _PATH_STDPATH; + +int +main (void) +{ + char *wr_path = strdupa (path); + char *cp = strtok (wr_path, ":"); + + while (cp != NULL) + { + puts (cp); + cp = strtok (NULL, ":"); + } + return 0; +} diff --git a/REORG.TODO/manual/examples/strftim.c b/REORG.TODO/manual/examples/strftim.c new file mode 100644 index 0000000000..08eb22d539 --- /dev/null +++ b/REORG.TODO/manual/examples/strftim.c @@ -0,0 +1,48 @@ +/* Time Functions Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <time.h> +#include <stdio.h> + +#define SIZE 256 + +int +main (void) +{ + char buffer[SIZE]; + time_t curtime; + struct tm *loctime; + + /* Get the current time. */ + curtime = time (NULL); + + /* Convert it to local time representation. */ + loctime = localtime (&curtime); + + /* Print out the date and time in the standard format. */ + fputs (asctime (loctime), stdout); + +/*@group*/ + /* Print it out in a nice format. */ + strftime (buffer, SIZE, "Today is %A, %B %d.\n", loctime); + fputs (buffer, stdout); + strftime (buffer, SIZE, "The time is %I:%M %p.\n", loctime); + fputs (buffer, stdout); + + return 0; +} +/*@end group*/ diff --git a/REORG.TODO/manual/examples/subopt.c b/REORG.TODO/manual/examples/subopt.c new file mode 100644 index 0000000000..278b6dbbc1 --- /dev/null +++ b/REORG.TODO/manual/examples/subopt.c @@ -0,0 +1,95 @@ +/* Parsing of Suboptions Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +int do_all; +const char *type; +int read_size; +int write_size; +int read_only; + +enum +{ + RO_OPTION = 0, + RW_OPTION, + READ_SIZE_OPTION, + WRITE_SIZE_OPTION, + THE_END +}; + +const char *mount_opts[] = +{ + [RO_OPTION] = "ro", + [RW_OPTION] = "rw", + [READ_SIZE_OPTION] = "rsize", + [WRITE_SIZE_OPTION] = "wsize", + [THE_END] = NULL +}; + +int +main (int argc, char **argv) +{ + char *subopts, *value; + int opt; + + while ((opt = getopt (argc, argv, "at:o:")) != -1) + switch (opt) + { + case 'a': + do_all = 1; + break; + case 't': + type = optarg; + break; + case 'o': + subopts = optarg; + while (*subopts != '\0') + switch (getsubopt (&subopts, mount_opts, &value)) + { + case RO_OPTION: + read_only = 1; + break; + case RW_OPTION: + read_only = 0; + break; + case READ_SIZE_OPTION: + if (value == NULL) + abort (); + read_size = atoi (value); + break; + case WRITE_SIZE_OPTION: + if (value == NULL) + abort (); + write_size = atoi (value); + break; + default: + /* Unknown suboption. */ + printf ("Unknown suboption `%s'\n", value); + break; + } + break; + default: + abort (); + } + + /* Do the real work. */ + + return 0; +} diff --git a/REORG.TODO/manual/examples/swapcontext.c b/REORG.TODO/manual/examples/swapcontext.c new file mode 100644 index 0000000000..3747230c31 --- /dev/null +++ b/REORG.TODO/manual/examples/swapcontext.c @@ -0,0 +1,116 @@ +/* Complete Context Control + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/time.h> + +/* Set by the signal handler. */ +static volatile int expired; + +/* The contexts. */ +static ucontext_t uc[3]; + +/* We do only a certain number of switches. */ +static int switches; + + +/* This is the function doing the work. It is just a + skeleton, real code has to be filled in. */ +static void +f (int n) +{ + int m = 0; + while (1) + { + /* This is where the work would be done. */ + if (++m % 100 == 0) + { + putchar ('.'); + fflush (stdout); + } + + /* Regularly the @var{expire} variable must be checked. */ + if (expired) + { + /* We do not want the program to run forever. */ + if (++switches == 20) + return; + + printf ("\nswitching from %d to %d\n", n, 3 - n); + expired = 0; + /* Switch to the other context, saving the current one. */ + swapcontext (&uc[n], &uc[3 - n]); + } + } +} + +/* This is the signal handler which simply set the variable. */ +void +handler (int signal) +{ + expired = 1; +} + + +int +main (void) +{ + struct sigaction sa; + struct itimerval it; + char st1[8192]; + char st2[8192]; + + /* Initialize the data structures for the interval timer. */ + sa.sa_flags = SA_RESTART; + sigfillset (&sa.sa_mask); + sa.sa_handler = handler; + it.it_interval.tv_sec = 0; + it.it_interval.tv_usec = 1; + it.it_value = it.it_interval; + + /* Install the timer and get the context we can manipulate. */ + if (sigaction (SIGPROF, &sa, NULL) < 0 + || setitimer (ITIMER_PROF, &it, NULL) < 0 + || getcontext (&uc[1]) == -1 + || getcontext (&uc[2]) == -1) + abort (); + + /* Create a context with a separate stack which causes the + function @code{f} to be call with the parameter @code{1}. + Note that the @code{uc_link} points to the main context + which will cause the program to terminate once the function + return. */ + uc[1].uc_link = &uc[0]; + uc[1].uc_stack.ss_sp = st1; + uc[1].uc_stack.ss_size = sizeof st1; + makecontext (&uc[1], (void (*) (void)) f, 1, 1); + + /* Similarly, but @code{2} is passed as the parameter to @code{f}. */ + uc[2].uc_link = &uc[0]; + uc[2].uc_stack.ss_sp = st2; + uc[2].uc_stack.ss_size = sizeof st2; + makecontext (&uc[2], (void (*) (void)) f, 1, 2); + + /* Start running. */ + swapcontext (&uc[0], &uc[1]); + putchar ('\n'); + + return 0; +} diff --git a/REORG.TODO/manual/examples/termios.c b/REORG.TODO/manual/examples/termios.c new file mode 100644 index 0000000000..8ac2f626d5 --- /dev/null +++ b/REORG.TODO/manual/examples/termios.c @@ -0,0 +1,77 @@ +/* Noncanonical Mode Example + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <termios.h> + +/* Use this variable to remember original terminal attributes. */ + +struct termios saved_attributes; + +void +reset_input_mode (void) +{ + tcsetattr (STDIN_FILENO, TCSANOW, &saved_attributes); +} + +void +set_input_mode (void) +{ + struct termios tattr; + char *name; + + /* Make sure stdin is a terminal. */ + if (!isatty (STDIN_FILENO)) + { + fprintf (stderr, "Not a terminal.\n"); + exit (EXIT_FAILURE); + } + + /* Save the terminal attributes so we can restore them later. */ + tcgetattr (STDIN_FILENO, &saved_attributes); + atexit (reset_input_mode); + +/*@group*/ + /* Set the funny terminal modes. */ + tcgetattr (STDIN_FILENO, &tattr); + tattr.c_lflag &= ~(ICANON|ECHO); /* Clear ICANON and ECHO. */ + tattr.c_cc[VMIN] = 1; + tattr.c_cc[VTIME] = 0; + tcsetattr (STDIN_FILENO, TCSAFLUSH, &tattr); +} +/*@end group*/ + +int +main (void) +{ + char c; + + set_input_mode (); + + while (1) + { + read (STDIN_FILENO, &c, 1); + if (c == '\004') /* @kbd{C-d} */ + break; + else + putchar (c); + } + + return EXIT_SUCCESS; +} diff --git a/REORG.TODO/manual/examples/testopt.c b/REORG.TODO/manual/examples/testopt.c new file mode 100644 index 0000000000..1cb53352eb --- /dev/null +++ b/REORG.TODO/manual/examples/testopt.c @@ -0,0 +1,72 @@ +/* Example of Parsing Arguments with getopt. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/*@group*/ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +int +main (int argc, char **argv) +{ + int aflag = 0; + int bflag = 0; + char *cvalue = NULL; + int index; + int c; + + opterr = 0; +/*@end group*/ + +/*@group*/ + while ((c = getopt (argc, argv, "abc:")) != -1) + switch (c) + { + case 'a': + aflag = 1; + break; + case 'b': + bflag = 1; + break; + case 'c': + cvalue = optarg; + break; + case '?': + if (optopt == 'c') + fprintf (stderr, "Option -%c requires an argument.\n", optopt); + else if (isprint (optopt)) + fprintf (stderr, "Unknown option `-%c'.\n", optopt); + else + fprintf (stderr, + "Unknown option character `\\x%x'.\n", + optopt); + return 1; + default: + abort (); + } +/*@end group*/ + +/*@group*/ + printf ("aflag = %d, bflag = %d, cvalue = %s\n", + aflag, bflag, cvalue); + + for (index = optind; index < argc; index++) + printf ("Non-option argument %s\n", argv[index]); + return 0; +} +/*@end group*/ diff --git a/REORG.TODO/manual/examples/testpass.c b/REORG.TODO/manual/examples/testpass.c new file mode 100644 index 0000000000..12c44f3028 --- /dev/null +++ b/REORG.TODO/manual/examples/testpass.c @@ -0,0 +1,43 @@ +/* Verify a password. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <crypt.h> + +int +main(void) +{ + /* Hashed form of "GNU libc manual". */ + const char *const pass = "$1$/iSaq7rB$EoUw5jJPPvAPECNaaWzMK/"; + + char *result; + int ok; + +/*@group*/ + /* Read in the user's password and encrypt it, + passing the expected password in as the salt. */ + result = crypt(getpass("Password:"), pass); +/*@end group*/ + + /* Test the result. */ + ok = strcmp (result, pass) == 0; + + puts(ok ? "Access granted." : "Access denied."); + return ok ? 0 : 1; +} diff --git a/REORG.TODO/manual/examples/timeval_subtract.c b/REORG.TODO/manual/examples/timeval_subtract.c new file mode 100644 index 0000000000..554dc77f40 --- /dev/null +++ b/REORG.TODO/manual/examples/timeval_subtract.c @@ -0,0 +1,44 @@ +/* struct timeval subtraction. + Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, if not, see <http://www.gnu.org/licenses/>. +*/ + +/* Subtract the `struct timeval' values X and Y, + storing the result in RESULT. + Return 1 if the difference is negative, otherwise 0. */ + +int +timeval_subtract (struct timeval *result, struct timeval *x, struct timeval *y) +{ + /* Perform the carry for the later subtraction by updating @var{y}. */ + if (x->tv_usec < y->tv_usec) { + int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; + y->tv_usec -= 1000000 * nsec; + y->tv_sec += nsec; + } + if (x->tv_usec - y->tv_usec > 1000000) { + int nsec = (x->tv_usec - y->tv_usec) / 1000000; + y->tv_usec += 1000000 * nsec; + y->tv_sec -= nsec; + } + + /* Compute the time remaining to wait. + @code{tv_usec} is certainly positive. */ + result->tv_sec = x->tv_sec - y->tv_sec; + result->tv_usec = x->tv_usec - y->tv_usec; + + /* Return 1 if result is negative. */ + return x->tv_sec < y->tv_sec; +} diff --git a/REORG.TODO/manual/fdl-1.3.texi b/REORG.TODO/manual/fdl-1.3.texi new file mode 100644 index 0000000000..cb71f05a17 --- /dev/null +++ b/REORG.TODO/manual/fdl-1.3.texi @@ -0,0 +1,505 @@ +@c The GNU Free Documentation License. +@center Version 1.3, 3 November 2008 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. +@uref{http://fsf.org/} + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +@end display + +@enumerate 0 +@item +PREAMBLE + +The purpose of this License is to make a manual, textbook, or other +functional and useful document @dfn{free} in the sense of freedom: to +assure everyone the effective freedom to copy and redistribute it, +with or without modifying it, either commercially or noncommercially. +Secondarily, this License preserves for the author and publisher a way +to get credit for their work, while not being considered responsible +for modifications made by others. + +This License is a kind of ``copyleft'', which means that derivative +works of the document must themselves be free in the same sense. It +complements the GNU General Public License, which is a copyleft +license designed for free software. + +We have designed this License in order to use it for manuals for free +software, because free software needs free documentation: a free +program should come with manuals providing the same freedoms that the +software does. But this License is not limited to software manuals; +it can be used for any textual work, regardless of subject matter or +whether it is published as a printed book. We recommend this License +principally for works whose purpose is instruction or reference. + +@item +APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work, in any medium, that +contains a notice placed by the copyright holder saying it can be +distributed under the terms of this License. Such a notice grants a +world-wide, royalty-free license, unlimited in duration, to use that +work under the conditions stated herein. The ``Document'', below, +refers to any such manual or work. Any member of the public is a +licensee, and is addressed as ``you''. You accept the license if you +copy, modify or distribute the work in a way requiring permission +under copyright law. + +A ``Modified Version'' of the Document means any work containing the +Document or a portion of it, either copied verbatim, or with +modifications and/or translated into another language. + +A ``Secondary Section'' is a named appendix or a front-matter section +of the Document that deals exclusively with the relationship of the +publishers or authors of the Document to the Document's overall +subject (or to related matters) and contains nothing that could fall +directly within that overall subject. (Thus, if the Document is in +part a textbook of mathematics, a Secondary Section may not explain +any mathematics.) The relationship could be a matter of historical +connection with the subject or with related matters, or of legal, +commercial, philosophical, ethical or political position regarding +them. + +The ``Invariant Sections'' are certain Secondary Sections whose titles +are designated, as being those of Invariant Sections, in the notice +that says that the Document is released under this License. If a +section does not fit the above definition of Secondary then it is not +allowed to be designated as Invariant. The Document may contain zero +Invariant Sections. If the Document does not identify any Invariant +Sections then there are none. + +The ``Cover Texts'' are certain short passages of text that are listed, +as Front-Cover Texts or Back-Cover Texts, in the notice that says that +the Document is released under this License. A Front-Cover Text may +be at most 5 words, and a Back-Cover Text may be at most 25 words. + +A ``Transparent'' copy of the Document means a machine-readable copy, +represented in a format whose specification is available to the +general public, that is suitable for revising the document +straightforwardly with generic text editors or (for images composed of +pixels) generic paint programs or (for drawings) some widely available +drawing editor, and that is suitable for input to text formatters or +for automatic translation to a variety of formats suitable for input +to text formatters. A copy made in an otherwise Transparent file +format whose markup, or absence of markup, has been arranged to thwart +or discourage subsequent modification by readers is not Transparent. +An image format is not Transparent if used for any substantial amount +of text. A copy that is not ``Transparent'' is called ``Opaque''. + +Examples of suitable formats for Transparent copies include plain +ASCII without markup, Texinfo input format, La@TeX{} input +format, SGML or XML using a publicly available +DTD, and standard-conforming simple HTML, +PostScript or PDF designed for human modification. Examples +of transparent image formats include PNG, XCF and +JPG. Opaque formats include proprietary formats that can be +read and edited only by proprietary word processors, SGML or +XML for which the DTD and/or processing tools are +not generally available, and the machine-generated HTML, +PostScript or PDF produced by some word processors for +output purposes only. + +The ``Title Page'' means, for a printed book, the title page itself, +plus such following pages as are needed to hold, legibly, the material +this License requires to appear in the title page. For works in +formats which do not have any title page as such, ``Title Page'' means +the text near the most prominent appearance of the work's title, +preceding the beginning of the body of the text. + +The ``publisher'' means any person or entity that distributes copies +of the Document to the public. + +A section ``Entitled XYZ'' means a named subunit of the Document whose +title either is precisely XYZ or contains XYZ in parentheses following +text that translates XYZ in another language. (Here XYZ stands for a +specific section name mentioned below, such as ``Acknowledgements'', +``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title'' +of such a section when you modify the Document means that it remains a +section ``Entitled XYZ'' according to this definition. + +The Document may include Warranty Disclaimers next to the notice which +states that this License applies to the Document. These Warranty +Disclaimers are considered to be included by reference in this +License, but only as regards disclaiming warranties: any other +implication that these Warranty Disclaimers may have is void and has +no effect on the meaning of this License. + +@item +VERBATIM COPYING + +You may copy and distribute the Document in any medium, either +commercially or noncommercially, provided that this License, the +copyright notices, and the license notice saying this License applies +to the Document are reproduced in all copies, and that you add no other +conditions whatsoever to those of this License. You may not use +technical measures to obstruct or control the reading or further +copying of the copies you make or distribute. However, you may accept +compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and +you may publicly display copies. + +@item +COPYING IN QUANTITY + +If you publish printed copies (or copies in media that commonly have +printed covers) of the Document, numbering more than 100, and the +Document's license notice requires Cover Texts, you must enclose the +copies in covers that carry, clearly and legibly, all these Cover +Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on +the back cover. Both covers must also clearly and legibly identify +you as the publisher of these copies. The front cover must present +the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. +Copying with changes limited to the covers, as long as they preserve +the title of the Document and satisfy these conditions, can be treated +as verbatim copying in other respects. + +If the required texts for either cover are too voluminous to fit +legibly, you should put the first ones listed (as many as fit +reasonably) on the actual cover, and continue the rest onto adjacent +pages. + +If you publish or distribute Opaque copies of the Document numbering +more than 100, you must either include a machine-readable Transparent +copy along with each Opaque copy, or state in or with each Opaque copy +a computer-network location from which the general network-using +public has access to download using public-standard network protocols +a complete Transparent copy of the Document, free of added material. +If you use the latter option, you must take reasonably prudent steps, +when you begin distribution of Opaque copies in quantity, to ensure +that this Transparent copy will remain thus accessible at the stated +location until at least one year after the last time you distribute an +Opaque copy (directly or through your agents or retailers) of that +edition to the public. + +It is requested, but not required, that you contact the authors of the +Document well before redistributing any large number of copies, to give +them a chance to provide you with an updated version of the Document. + +@item +MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under +the conditions of sections 2 and 3 above, provided that you release +the Modified Version under precisely this License, with the Modified +Version filling the role of the Document, thus licensing distribution +and modification of the Modified Version to whoever possesses a copy +of it. In addition, you must do these things in the Modified Version: + +@enumerate A +@item +Use in the Title Page (and on the covers, if any) a title distinct +from that of the Document, and from those of previous versions +(which should, if there were any, be listed in the History section +of the Document). You may use the same title as a previous version +if the original publisher of that version gives permission. + +@item +List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified +Version, together with at least five of the principal authors of the +Document (all of its principal authors, if it has fewer than five), +unless they release you from this requirement. + +@item +State on the Title page the name of the publisher of the +Modified Version, as the publisher. + +@item +Preserve all the copyright notices of the Document. + +@item +Add an appropriate copyright notice for your modifications +adjacent to the other copyright notices. + +@item +Include, immediately after the copyright notices, a license notice +giving the public permission to use the Modified Version under the +terms of this License, in the form shown in the Addendum below. + +@item +Preserve in that license notice the full lists of Invariant Sections +and required Cover Texts given in the Document's license notice. + +@item +Include an unaltered copy of this License. + +@item +Preserve the section Entitled ``History'', Preserve its Title, and add +to it an item stating at least the title, year, new authors, and +publisher of the Modified Version as given on the Title Page. If +there is no section Entitled ``History'' in the Document, create one +stating the title, year, authors, and publisher of the Document as +given on its Title Page, then add an item describing the Modified +Version as stated in the previous sentence. + +@item +Preserve the network location, if any, given in the Document for +public access to a Transparent copy of the Document, and likewise +the network locations given in the Document for previous versions +it was based on. These may be placed in the ``History'' section. +You may omit a network location for a work that was published at +least four years before the Document itself, or if the original +publisher of the version it refers to gives permission. + +@item +For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve +the Title of the section, and preserve in the section all the +substance and tone of each of the contributor acknowledgements and/or +dedications given therein. + +@item +Preserve all the Invariant Sections of the Document, +unaltered in their text and in their titles. Section numbers +or the equivalent are not considered part of the section titles. + +@item +Delete any section Entitled ``Endorsements''. Such a section +may not be included in the Modified Version. + +@item +Do not retitle any existing section to be Entitled ``Endorsements'' or +to conflict in title with any Invariant Section. + +@item +Preserve any Warranty Disclaimers. +@end enumerate + +If the Modified Version includes new front-matter sections or +appendices that qualify as Secondary Sections and contain no material +copied from the Document, you may at your option designate some or all +of these sections as invariant. To do this, add their titles to the +list of Invariant Sections in the Modified Version's license notice. +These titles must be distinct from any other section titles. + +You may add a section Entitled ``Endorsements'', provided it contains +nothing but endorsements of your Modified Version by various +parties---for example, statements of peer review or that the text has +been approved by an organization as the authoritative definition of a +standard. + +You may add a passage of up to five words as a Front-Cover Text, and a +passage of up to 25 words as a Back-Cover Text, to the end of the list +of Cover Texts in the Modified Version. Only one passage of +Front-Cover Text and one of Back-Cover Text may be added by (or +through arrangements made by) any one entity. If the Document already +includes a cover text for the same cover, previously added by you or +by arrangement made by the same entity you are acting on behalf of, +you may not add another; but you may replace the old one, on explicit +permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License +give permission to use their names for publicity for or to assert or +imply endorsement of any Modified Version. + +@item +COMBINING DOCUMENTS + +You may combine the Document with other documents released under this +License, under the terms defined in section 4 above for modified +versions, provided that you include in the combination all of the +Invariant Sections of all of the original documents, unmodified, and +list them all as Invariant Sections of your combined work in its +license notice, and that you preserve all their Warranty Disclaimers. + +The combined work need only contain one copy of this License, and +multiple identical Invariant Sections may be replaced with a single +copy. If there are multiple Invariant Sections with the same name but +different contents, make the title of each such section unique by +adding at the end of it, in parentheses, the name of the original +author or publisher of that section if known, or else a unique number. +Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections Entitled ``History'' +in the various original documents, forming one section Entitled +``History''; likewise combine any sections Entitled ``Acknowledgements'', +and any sections Entitled ``Dedications''. You must delete all +sections Entitled ``Endorsements.'' + +@item +COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this +License in the various documents with a single copy that is included in +the collection, provided that you follow the rules of this License for +verbatim copying of each of the documents in all other respects. + +You may extract a single document from such a collection, and distribute +it individually under this License, provided you insert a copy of this +License into the extracted document, and follow this License in all +other respects regarding verbatim copying of that document. + +@item +AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate +and independent documents or works, in or on a volume of a storage or +distribution medium, is called an ``aggregate'' if the copyright +resulting from the compilation is not used to limit the legal rights +of the compilation's users beyond what the individual works permit. +When the Document is included in an aggregate, this License does not +apply to the other works in the aggregate which are not themselves +derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these +copies of the Document, then if the Document is less than one half of +the entire aggregate, the Document's Cover Texts may be placed on +covers that bracket the Document within the aggregate, or the +electronic equivalent of covers if the Document is in electronic form. +Otherwise they must appear on printed covers that bracket the whole +aggregate. + +@item +TRANSLATION + +Translation is considered a kind of modification, so you may +distribute translations of the Document under the terms of section 4. +Replacing Invariant Sections with translations requires special +permission from their copyright holders, but you may include +translations of some or all Invariant Sections in addition to the +original versions of these Invariant Sections. You may include a +translation of this License, and all the license notices in the +Document, and any Warranty Disclaimers, provided that you also include +the original English version of this License and the original versions +of those notices and disclaimers. In case of a disagreement between +the translation and the original version of this License or a notice +or disclaimer, the original version will prevail. + +If a section in the Document is Entitled ``Acknowledgements'', +``Dedications'', or ``History'', the requirement (section 4) to Preserve +its Title (section 1) will typically require changing the actual +title. + +@item +TERMINATION + +You may not copy, modify, sublicense, or distribute the Document +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense, or distribute it is void, and +will automatically terminate your rights under this License. + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, receipt of a copy of some or all of the same material does +not give you any rights to use it. + +@item +FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions +of the GNU Free Documentation License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. See +@uref{http://www.gnu.org/copyleft/}. + +Each version of the License is given a distinguishing version number. +If the Document specifies that a particular numbered version of this +License ``or any later version'' applies to it, you have the option of +following the terms and conditions either of that specified version or +of any later version that has been published (not as a draft) by the +Free Software Foundation. If the Document does not specify a version +number of this License, you may choose any version ever published (not +as a draft) by the Free Software Foundation. If the Document +specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the +Document. + +@item +RELICENSING + +``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any +World Wide Web server that publishes copyrightable works and also +provides prominent facilities for anybody to edit those works. A +public wiki that anybody can edit is an example of such a server. A +``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the +site means any set of copyrightable works thus published on the MMC +site. + +``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0 +license published by Creative Commons Corporation, a not-for-profit +corporation with a principal place of business in San Francisco, +California, as well as future copyleft versions of that license +published by that same organization. + +``Incorporate'' means to publish or republish a Document, in whole or +in part, as part of another Document. + +An MMC is ``eligible for relicensing'' if it is licensed under this +License, and if all works that were first published under this License +somewhere other than this MMC, and subsequently incorporated in whole +or in part into the MMC, (1) had no cover texts or invariant sections, +and (2) were thus incorporated prior to November 1, 2008. + +The operator of an MMC Site may republish an MMC contained in the site +under CC-BY-SA on the same site at any time before August 1, 2009, +provided the MMC is eligible for relicensing. + +@end enumerate + +@page +@heading ADDENDUM: How to use this License for your documents + +To use this License in a document you have written, include a copy of +the License in the document and put the following copyright and +license notices just after the title page: + +@smallexample +@group + Copyright (C) @var{year} @var{your name}. + Permission is granted to copy, distribute and/or modify this document + under the terms of the GNU Free Documentation License, Version 1.3 + or any later version published by the Free Software Foundation; + with no Invariant Sections, no Front-Cover Texts, and no Back-Cover + Texts. A copy of the license is included in the section entitled ``GNU + Free Documentation License''. +@end group +@end smallexample + +If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, +replace the ``with@dots{}Texts.''@: line with this: + +@smallexample +@group + with the Invariant Sections being @var{list their titles}, with + the Front-Cover Texts being @var{list}, and with the Back-Cover Texts + being @var{list}. +@end group +@end smallexample + +If you have Invariant Sections without Cover Texts, or some other +combination of the three, merge those two alternatives to suit the +situation. + +If your document contains nontrivial examples of program code, we +recommend releasing these examples in parallel under your choice of +free software license, such as the GNU General Public License, +to permit their use in free software. + +@c Local Variables: +@c ispell-local-pdict: "ispell-dict" +@c End: diff --git a/REORG.TODO/manual/filesys.texi b/REORG.TODO/manual/filesys.texi new file mode 100644 index 0000000000..e3fe323f47 --- /dev/null +++ b/REORG.TODO/manual/filesys.texi @@ -0,0 +1,3657 @@ +@node File System Interface, Pipes and FIFOs, Low-Level I/O, Top +@c %MENU% Functions for manipulating files +@chapter File System Interface + +This chapter describes @theglibc{}'s functions for manipulating +files. Unlike the input and output functions (@pxref{I/O on Streams}; +@pxref{Low-Level I/O}), these functions are concerned with operating +on the files themselves rather than on their contents. + +Among the facilities described in this chapter are functions for +examining or modifying directories, functions for renaming and deleting +files, and functions for examining and setting file attributes such as +access permissions and modification times. + +@menu +* Working Directory:: This is used to resolve relative + file names. +* Accessing Directories:: Finding out what files a directory + contains. +* Working with Directory Trees:: Apply actions to all files or a selectable + subset of a directory hierarchy. +* Hard Links:: Adding alternate names to a file. +* Symbolic Links:: A file that ``points to'' a file name. +* Deleting Files:: How to delete a file, and what that means. +* Renaming Files:: Changing a file's name. +* Creating Directories:: A system call just for creating a directory. +* File Attributes:: Attributes of individual files. +* Making Special Files:: How to create special files. +* Temporary Files:: Naming and creating temporary files. +@end menu + +@node Working Directory +@section Working Directory + +@cindex current working directory +@cindex working directory +@cindex change working directory +Each process has associated with it a directory, called its @dfn{current +working directory} or simply @dfn{working directory}, that is used in +the resolution of relative file names (@pxref{File Name Resolution}). + +When you log in and begin a new session, your working directory is +initially set to the home directory associated with your login account +in the system user database. You can find any user's home directory +using the @code{getpwuid} or @code{getpwnam} functions; see @ref{User +Database}. + +Users can change the working directory using shell commands like +@code{cd}. The functions described in this section are the primitives +used by those commands and by other programs for examining and changing +the working directory. +@pindex cd + +Prototypes for these functions are declared in the header file +@file{unistd.h}. +@pindex unistd.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun {char *} getcwd (char *@var{buffer}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c If buffer is NULL, this function calls malloc and realloc, and, in +@c case of error, free. Linux offers a getcwd syscall that we use on +@c GNU/Linux systems, but it may fail if the pathname is too long. As a +@c fallback, and on other systems, the generic implementation opens each +@c parent directory with opendir, which allocates memory for the +@c directory stream with malloc. If a fstatat64 syscall is not +@c available, very deep directory trees may also have to malloc to build +@c longer sequences of ../../../... than those supported by a global +@c const read-only string. + +@c linux/__getcwd +@c posix/__getcwd +@c malloc/realloc/free if buffer is NULL, or if dir is too deep +@c lstat64 -> see its own entry +@c fstatat64 +@c direct syscall if possible, alloca+snprintf+*stat64 otherwise +@c openat64_not_cancel_3, close_not_cancel_no_status +@c __fdopendir, __opendir, __readdir, rewinddir +The @code{getcwd} function returns an absolute file name representing +the current working directory, storing it in the character array +@var{buffer} that you provide. The @var{size} argument is how you tell +the system the allocation size of @var{buffer}. + +The @glibcadj{} version of this function also permits you to specify a +null pointer for the @var{buffer} argument. Then @code{getcwd} +allocates a buffer automatically, as with @code{malloc} +(@pxref{Unconstrained Allocation}). If the @var{size} is greater than +zero, then the buffer is that large; otherwise, the buffer is as large +as necessary to hold the result. + +The return value is @var{buffer} on success and a null pointer on failure. +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EINVAL +The @var{size} argument is zero and @var{buffer} is not a null pointer. + +@item ERANGE +The @var{size} argument is less than the length of the working directory +name. You need to allocate a bigger array and try again. + +@item EACCES +Permission to read or search a component of the file name was denied. +@end table +@end deftypefun + +You could implement the behavior of GNU's @w{@code{getcwd (NULL, 0)}} +using only the standard behavior of @code{getcwd}: + +@smallexample +char * +gnu_getcwd () +@{ + size_t size = 100; + + while (1) + @{ + char *buffer = (char *) xmalloc (size); + if (getcwd (buffer, size) == buffer) + return buffer; + free (buffer); + if (errno != ERANGE) + return 0; + size *= 2; + @} +@} +@end smallexample + +@noindent +@xref{Malloc Examples}, for information about @code{xmalloc}, which is +not a library function but is a customary name used in most GNU +software. + +@comment unistd.h +@comment BSD +@deftypefn {Deprecated Function} {char *} getwd (char *@var{buffer}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @ascuintl{}}@acunsafe{@acsmem{} @acsfd{}}} +@c Besides the getcwd safety issues, it calls strerror_r on error, which +@c brings in all of the i18n issues. +This is similar to @code{getcwd}, but has no way to specify the size of +the buffer. @Theglibc{} provides @code{getwd} only +for backwards compatibility with BSD. + +The @var{buffer} argument should be a pointer to an array at least +@code{PATH_MAX} bytes long (@pxref{Limits for Files}). On @gnuhurdsystems{} +there is no limit to the size of a file name, so this is not +necessarily enough space to contain the directory name. That is why +this function is deprecated. +@end deftypefn + +@comment unistd.h +@comment GNU +@deftypefun {char *} get_current_dir_name (void) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c Besides getcwd, which this function calls as a fallback, it calls +@c getenv, with the potential thread-safety issues that brings about. +@vindex PWD +This @code{get_current_dir_name} function is basically equivalent to +@w{@code{getcwd (NULL, 0)}}. The only difference is that the value of +the @code{PWD} variable is returned if this value is correct. This is a +subtle difference which is visible if the path described by the +@code{PWD} value is using one or more symbol links in which case the +value returned by @code{getcwd} can resolve the symbol links and +therefore yield a different result. + +This function is a GNU extension. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int chdir (const char *@var{filename}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is used to set the process's working directory to +@var{filename}. + +The normal, successful return value from @code{chdir} is @code{0}. A +value of @code{-1} is returned to indicate an error. The @code{errno} +error conditions defined for this function are the usual file name +syntax errors (@pxref{File Name Errors}), plus @code{ENOTDIR} if the +file @var{filename} is not a directory. +@end deftypefun + +@comment unistd.h +@comment XPG +@deftypefun int fchdir (int @var{filedes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is used to set the process's working directory to +directory associated with the file descriptor @var{filedes}. + +The normal, successful return value from @code{fchdir} is @code{0}. A +value of @code{-1} is returned to indicate an error. The following +@code{errno} error conditions are defined for this function: + +@table @code +@item EACCES +Read permission is denied for the directory named by @code{dirname}. + +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item ENOTDIR +The file descriptor @var{filedes} is not associated with a directory. + +@item EINTR +The function call was interrupt by a signal. + +@item EIO +An I/O error occurred. +@end table +@end deftypefun + + +@node Accessing Directories +@section Accessing Directories +@cindex accessing directories +@cindex reading from a directory +@cindex directories, accessing + +The facilities described in this section let you read the contents of a +directory file. This is useful if you want your program to list all the +files in a directory, perhaps as part of a menu. + +@cindex directory stream +The @code{opendir} function opens a @dfn{directory stream} whose +elements are directory entries. Alternatively @code{fdopendir} can be +used which can have advantages if the program needs to have more +control over the way the directory is opened for reading. This +allows, for instance, to pass the @code{O_NOATIME} flag to +@code{open}. + +You use the @code{readdir} function on the directory stream to +retrieve these entries, represented as @w{@code{struct dirent}} +objects. The name of the file for each entry is stored in the +@code{d_name} member of this structure. There are obvious parallels +here to the stream facilities for ordinary files, described in +@ref{I/O on Streams}. + +@menu +* Directory Entries:: Format of one directory entry. +* Opening a Directory:: How to open a directory stream. +* Reading/Closing Directory:: How to read directory entries from the stream. +* Simple Directory Lister:: A very simple directory listing program. +* Random Access Directory:: Rereading part of the directory + already read with the same stream. +* Scanning Directory Content:: Get entries for user selected subset of + contents in given directory. +* Simple Directory Lister Mark II:: Revised version of the program. +@end menu + +@node Directory Entries +@subsection Format of a Directory Entry + +@pindex dirent.h +This section describes what you find in a single directory entry, as you +might obtain it from a directory stream. All the symbols are declared +in the header file @file{dirent.h}. + +@comment dirent.h +@comment POSIX.1 +@deftp {Data Type} {struct dirent} +This is a structure type used to return information about directory +entries. It contains the following fields: + +@table @code +@item char d_name[] +This is the null-terminated file name component. This is the only +field you can count on in all POSIX systems. + +@item ino_t d_fileno +This is the file serial number. For BSD compatibility, you can also +refer to this member as @code{d_ino}. On @gnulinuxhurdsystems{} and most POSIX +systems, for most files this the same as the @code{st_ino} member that +@code{stat} will return for the file. @xref{File Attributes}. + +@item unsigned char d_namlen +This is the length of the file name, not including the terminating +null character. Its type is @code{unsigned char} because that is the +integer type of the appropriate size. This member is a BSD extension. +The symbol @code{_DIRENT_HAVE_D_NAMLEN} is defined if this member is +available. + +@item unsigned char d_type +This is the type of the file, possibly unknown. The following constants +are defined for its value: + +@vtable @code +@item DT_UNKNOWN +The type is unknown. Only some filesystems have full support to +return the type of the file, others might always return this value. + +@item DT_REG +A regular file. + +@item DT_DIR +A directory. + +@item DT_FIFO +A named pipe, or FIFO. @xref{FIFO Special Files}. + +@item DT_SOCK +A local-domain socket. @c !!! @xref{Local Domain}. + +@item DT_CHR +A character device. + +@item DT_BLK +A block device. + +@item DT_LNK +A symbolic link. +@end vtable + +This member is a BSD extension. The symbol @code{_DIRENT_HAVE_D_TYPE} +is defined if this member is available. On systems where it is used, it +corresponds to the file type bits in the @code{st_mode} member of +@code{struct stat}. If the value cannot be determined the member +value is DT_UNKNOWN. These two macros convert between @code{d_type} +values and @code{st_mode} values: + +@comment dirent.h +@comment BSD +@deftypefun int IFTODT (mode_t @var{mode}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This returns the @code{d_type} value corresponding to @var{mode}. +@end deftypefun + +@comment dirent.h +@comment BSD +@deftypefun mode_t DTTOIF (int @var{dtype}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This returns the @code{st_mode} value corresponding to @var{dtype}. +@end deftypefun +@end table + +This structure may contain additional members in the future. Their +availability is always announced in the compilation environment by a +macro named @code{_DIRENT_HAVE_D_@var{xxx}} where @var{xxx} is replaced +by the name of the new member. For instance, the member @code{d_reclen} +available on some systems is announced through the macro +@code{_DIRENT_HAVE_D_RECLEN}. + +When a file has multiple names, each name has its own directory entry. +The only way you can tell that the directory entries belong to a +single file is that they have the same value for the @code{d_fileno} +field. + +File attributes such as size, modification times etc., are part of the +file itself, not of any particular directory entry. @xref{File +Attributes}. +@end deftp + +@node Opening a Directory +@subsection Opening a Directory Stream + +@pindex dirent.h +This section describes how to open a directory stream. All the symbols +are declared in the header file @file{dirent.h}. + +@comment dirent.h +@comment POSIX.1 +@deftp {Data Type} DIR +The @code{DIR} data type represents a directory stream. +@end deftp + +You shouldn't ever allocate objects of the @code{struct dirent} or +@code{DIR} data types, since the directory access functions do that for +you. Instead, you refer to these objects using the pointers returned by +the following functions. + +@comment dirent.h +@comment POSIX.1 +@deftypefun {DIR *} opendir (const char *@var{dirname}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c Besides the safe syscall, we have to allocate the DIR object with +@c __alloc_dir, that calls malloc. +The @code{opendir} function opens and returns a directory stream for +reading the directory whose file name is @var{dirname}. The stream has +type @code{DIR *}. + +If unsuccessful, @code{opendir} returns a null pointer. In addition to +the usual file name errors (@pxref{File Name Errors}), the +following @code{errno} error conditions are defined for this function: + +@table @code +@item EACCES +Read permission is denied for the directory named by @code{dirname}. + +@item EMFILE +The process has too many files open. + +@item ENFILE +The entire system, or perhaps the file system which contains the +directory, cannot support any additional open files at the moment. +(This problem cannot happen on @gnuhurdsystems{}.) + +@item ENOMEM +Not enough memory available. +@end table + +The @code{DIR} type is typically implemented using a file descriptor, +and the @code{opendir} function in terms of the @code{open} function. +@xref{Low-Level I/O}. Directory streams and the underlying +file descriptors are closed on @code{exec} (@pxref{Executing a File}). +@end deftypefun + +The directory which is opened for reading by @code{opendir} is +identified by the name. In some situations this is not sufficient. +Or the way @code{opendir} implicitly creates a file descriptor for the +directory is not the way a program might want it. In these cases an +alternative interface can be used. + +@comment dirent.h +@comment GNU +@deftypefun {DIR *} fdopendir (int @var{fd}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c The DIR object is allocated with __alloc_dir, that calls malloc. +The @code{fdopendir} function works just like @code{opendir} but +instead of taking a file name and opening a file descriptor for the +directory the caller is required to provide a file descriptor. This +file descriptor is then used in subsequent uses of the returned +directory stream object. + +The caller must make sure the file descriptor is associated with a +directory and it allows reading. + +If the @code{fdopendir} call returns successfully the file descriptor +is now under the control of the system. It can be used in the same +way the descriptor implicitly created by @code{opendir} can be used +but the program must not close the descriptor. + +In case the function is unsuccessful it returns a null pointer and the +file descriptor remains to be usable by the program. The following +@code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The file descriptor is not valid. + +@item ENOTDIR +The file descriptor is not associated with a directory. + +@item EINVAL +The descriptor does not allow reading the directory content. + +@item ENOMEM +Not enough memory available. +@end table +@end deftypefun + +In some situations it can be desirable to get hold of the file +descriptor which is created by the @code{opendir} call. For instance, +to switch the current working directory to the directory just read the +@code{fchdir} function could be used. Historically the @code{DIR} type +was exposed and programs could access the fields. This does not happen +in @theglibc{}. Instead a separate function is provided to allow +access. + +@comment dirent.h +@comment GNU +@deftypefun int dirfd (DIR *@var{dirstream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The function @code{dirfd} returns the file descriptor associated with +the directory stream @var{dirstream}. This descriptor can be used until +the directory is closed with @code{closedir}. If the directory stream +implementation is not using file descriptors the return value is +@code{-1}. +@end deftypefun + +@node Reading/Closing Directory +@subsection Reading and Closing a Directory Stream + +@pindex dirent.h +This section describes how to read directory entries from a directory +stream, and how to close the stream when you are done with it. All the +symbols are declared in the header file @file{dirent.h}. + +@comment dirent.h +@comment POSIX.1 +@deftypefun {struct dirent *} readdir (DIR *@var{dirstream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c This function holds dirstream's non-recursive lock, which brings +@c about the usual issues with locks and async signals and cancellation, +@c but the lock taking is not enough to make the returned value safe to +@c use, since it points to a stream's internal buffer that can be +@c overwritten by subsequent calls or even released by closedir. +This function reads the next entry from the directory. It normally +returns a pointer to a structure containing information about the +file. This structure is associated with the @var{dirstream} handle +and can be rewritten by a subsequent call. + +@strong{Portability Note:} On some systems @code{readdir} may not +return entries for @file{.} and @file{..}, even though these are always +valid file names in any directory. @xref{File Name Resolution}. + +If there are no more entries in the directory or an error is detected, +@code{readdir} returns a null pointer. The following @code{errno} error +conditions are defined for this function: + +@table @code +@item EBADF +The @var{dirstream} argument is not valid. +@end table + +To distinguish between an end-of-directory condition or an error, you +must set @code{errno} to zero before calling @code{readdir}. To avoid +entering an infinite loop, you should stop reading from the directory +after the first error. + +@strong{Caution:} The pointer returned by @code{readdir} points to +a buffer within the @code{DIR} object. The data in that buffer will +be overwritten by the next call to @code{readdir}. You must take care, +for instance, to copy the @code{d_name} string if you need it later. + +Because of this, it is not safe to share a @code{DIR} object among +multiple threads, unless you use your own locking to ensure that +no thread calls @code{readdir} while another thread is still using the +data from the previous call. In @theglibc{}, it is safe to call +@code{readdir} from multiple threads as long as each thread uses +its own @code{DIR} object. POSIX.1-2008 does not require this to +be safe, but we are not aware of any operating systems where it +does not work. + +@code{readdir_r} allows you to provide your own buffer for the +@code{struct dirent}, but it is less portable than @code{readdir}, and +has problems with very long filenames (see below). We recommend +you use @code{readdir}, but do not share @code{DIR} objects. +@end deftypefun + +@comment dirent.h +@comment GNU +@deftypefun int readdir_r (DIR *@var{dirstream}, struct dirent *@var{entry}, struct dirent **@var{result}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +This function is a version of @code{readdir} which performs internal +locking. Like @code{readdir} it returns the next entry from the +directory. To prevent conflicts between simultaneously running +threads the result is stored inside the @var{entry} object. + +@strong{Portability Note:} @code{readdir_r} is deprecated. It is +recommended to use @code{readdir} instead of @code{readdir_r} for the +following reasons: + +@itemize @bullet +@item +On systems which do not define @code{NAME_MAX}, it may not be possible +to use @code{readdir_r} safely because the caller does not specify the +length of the buffer for the directory entry. + +@item +On some systems, @code{readdir_r} cannot read directory entries with +very long names. If such a name is encountered, @theglibc{} +implementation of @code{readdir_r} returns with an error code of +@code{ENAMETOOLONG} after the final directory entry has been read. On +other systems, @code{readdir_r} may return successfully, but the +@code{d_name} member may not be NUL-terminated or may be truncated. + +@item +POSIX-1.2008 does not guarantee that @code{readdir} is thread-safe, +even when access to the same @var{dirstream} is serialized. But in +current implementations (including @theglibc{}), it is safe to call +@code{readdir} concurrently on different @var{dirstream}s, so there is +no need to use @code{readdir_r} in most multi-threaded programs. In +the rare case that multiple threads need to read from the same +@var{dirstream}, it is still better to use @code{readdir} and external +synchronization. + +@item +It is expected that future versions of POSIX will obsolete +@code{readdir_r} and mandate the level of thread safety for +@code{readdir} which is provided by @theglibc{} and other +implementations today. +@end itemize + +Normally @code{readdir_r} returns zero and sets @code{*@var{result}} +to @var{entry}. If there are no more entries in the directory or an +error is detected, @code{readdir_r} sets @code{*@var{result}} to a +null pointer and returns a nonzero error code, also stored in +@code{errno}, as described for @code{readdir}. + +It is also important to look at the definition of the @code{struct +dirent} type. Simply passing a pointer to an object of this type for +the second parameter of @code{readdir_r} might not be enough. Some +systems don't define the @code{d_name} element sufficiently long. In +this case the user has to provide additional space. There must be room +for at least @code{NAME_MAX + 1} characters in the @code{d_name} array. +Code to call @code{readdir_r} could look like this: + +@smallexample + union + @{ + struct dirent d; + char b[offsetof (struct dirent, d_name) + NAME_MAX + 1]; + @} u; + + if (readdir_r (dir, &u.d, &res) == 0) + @dots{} +@end smallexample +@end deftypefun + +To support large filesystems on 32-bit machines there are LFS variants +of the last two functions. + +@comment dirent.h +@comment LFS +@deftypefun {struct dirent64 *} readdir64 (DIR *@var{dirstream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +The @code{readdir64} function is just like the @code{readdir} function +except that it returns a pointer to a record of type @code{struct +dirent64}. Some of the members of this data type (notably @code{d_ino}) +might have a different size to allow large filesystems. + +In all other aspects this function is equivalent to @code{readdir}. +@end deftypefun + +@comment dirent.h +@comment LFS +@deftypefun int readdir64_r (DIR *@var{dirstream}, struct dirent64 *@var{entry}, struct dirent64 **@var{result}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +The deprecated @code{readdir64_r} function is equivalent to the +@code{readdir_r} function except that it takes parameters of base type +@code{struct dirent64} instead of @code{struct dirent} in the second and +third position. The same precautions mentioned in the documentation of +@code{readdir_r} also apply here. +@end deftypefun + +@comment dirent.h +@comment POSIX.1 +@deftypefun int closedir (DIR *@var{dirstream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{/hurd}}@acunsafe{@acsmem{} @acsfd{} @aculock{/hurd}}} +@c No synchronization in the posix implementation, only in the hurd +@c one. This is regarded as safe because it is undefined behavior if +@c other threads could still be using the dir stream while it's closed. +This function closes the directory stream @var{dirstream}. It returns +@code{0} on success and @code{-1} on failure. + +The following @code{errno} error conditions are defined for this +function: + +@table @code +@item EBADF +The @var{dirstream} argument is not valid. +@end table +@end deftypefun + +@node Simple Directory Lister +@subsection Simple Program to List a Directory + +Here's a simple program that prints the names of the files in +the current working directory: + +@smallexample +@include dir.c.texi +@end smallexample + +The order in which files appear in a directory tends to be fairly +random. A more useful program would sort the entries (perhaps by +alphabetizing them) before printing them; see +@ref{Scanning Directory Content}, and @ref{Array Sort Function}. + + +@node Random Access Directory +@subsection Random Access in a Directory Stream + +@pindex dirent.h +This section describes how to reread parts of a directory that you have +already read from an open directory stream. All the symbols are +declared in the header file @file{dirent.h}. + +@comment dirent.h +@comment POSIX.1 +@deftypefun void rewinddir (DIR *@var{dirstream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +The @code{rewinddir} function is used to reinitialize the directory +stream @var{dirstream}, so that if you call @code{readdir} it +returns information about the first entry in the directory again. This +function also notices if files have been added or removed to the +directory since it was opened with @code{opendir}. (Entries for these +files might or might not be returned by @code{readdir} if they were +added or removed since you last called @code{opendir} or +@code{rewinddir}.) +@end deftypefun + +@comment dirent.h +@comment BSD +@deftypefun {long int} telldir (DIR *@var{dirstream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{/bsd} @asulock{/bsd}}@acunsafe{@acsmem{/bsd} @aculock{/bsd}}} +@c The implementation is safe on most platforms, but on BSD it uses +@c cookies, buckets and records, and the global array of pointers to +@c dynamically allocated records is guarded by a non-recursive lock. +The @code{telldir} function returns the file position of the directory +stream @var{dirstream}. You can use this value with @code{seekdir} to +restore the directory stream to that position. +@end deftypefun + +@comment dirent.h +@comment BSD +@deftypefun void seekdir (DIR *@var{dirstream}, long int @var{pos}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{/bsd} @asulock{/bsd}}@acunsafe{@acsmem{/bsd} @aculock{/bsd}}} +@c The implementation is safe on most platforms, but on BSD it uses +@c cookies, buckets and records, and the global array of pointers to +@c dynamically allocated records is guarded by a non-recursive lock. +The @code{seekdir} function sets the file position of the directory +stream @var{dirstream} to @var{pos}. The value @var{pos} must be the +result of a previous call to @code{telldir} on this particular stream; +closing and reopening the directory can invalidate values returned by +@code{telldir}. +@end deftypefun + + +@node Scanning Directory Content +@subsection Scanning the Content of a Directory + +A higher-level interface to the directory handling functions is the +@code{scandir} function. With its help one can select a subset of the +entries in a directory, possibly sort them and get a list of names as +the result. + +@comment dirent.h +@comment BSD, SVID +@deftypefun int scandir (const char *@var{dir}, struct dirent ***@var{namelist}, int (*@var{selector}) (const struct dirent *), int (*@var{cmp}) (const struct dirent **, const struct dirent **)) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c The scandir function calls __opendirat, __readdir, and __closedir to +@c go over the named dir; malloc and realloc to allocate the namelist +@c and copies of each selected dirent, besides the selector, if given, +@c and qsort and the cmp functions if the latter is given. In spite of +@c the cleanup handler that releases memory and the file descriptor in +@c case of synchronous cancellation, an asynchronous cancellation may +@c still leak memory and a file descriptor. Although readdir is unsafe +@c in general, the use of an internal dir stream for sequential scanning +@c of the directory with copying of dirents before subsequent calls +@c makes the use safe, and the fact that the dir stream is private to +@c each scandir call does away with the lock issues in readdir and +@c closedir. + +The @code{scandir} function scans the contents of the directory selected +by @var{dir}. The result in *@var{namelist} is an array of pointers to +structures of type @code{struct dirent} which describe all selected +directory entries and which is allocated using @code{malloc}. Instead +of always getting all directory entries returned, the user supplied +function @var{selector} can be used to decide which entries are in the +result. Only the entries for which @var{selector} returns a non-zero +value are selected. + +Finally the entries in *@var{namelist} are sorted using the +user-supplied function @var{cmp}. The arguments passed to the @var{cmp} +function are of type @code{struct dirent **}, therefore one cannot +directly use the @code{strcmp} or @code{strcoll} functions; instead see +the functions @code{alphasort} and @code{versionsort} below. + +The return value of the function is the number of entries placed in +*@var{namelist}. If it is @code{-1} an error occurred (either the +directory could not be opened for reading or the malloc call failed) and +the global variable @code{errno} contains more information on the error. +@end deftypefun + +As described above, the fourth argument to the @code{scandir} function +must be a pointer to a sorting function. For the convenience of the +programmer @theglibc{} contains implementations of functions which +are very helpful for this purpose. + +@comment dirent.h +@comment BSD, SVID +@deftypefun int alphasort (const struct dirent **@var{a}, const struct dirent **@var{b}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c Calls strcoll. +The @code{alphasort} function behaves like the @code{strcoll} function +(@pxref{String/Array Comparison}). The difference is that the arguments +are not string pointers but instead they are of type +@code{struct dirent **}. + +The return value of @code{alphasort} is less than, equal to, or greater +than zero depending on the order of the two entries @var{a} and @var{b}. +@end deftypefun + +@comment dirent.h +@comment GNU +@deftypefun int versionsort (const struct dirent **@var{a}, const struct dirent **@var{b}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c Calls strverscmp, which will accesses the locale object multiple +@c times. +The @code{versionsort} function is like @code{alphasort} except that it +uses the @code{strverscmp} function internally. +@end deftypefun + +If the filesystem supports large files we cannot use the @code{scandir} +anymore since the @code{dirent} structure might not able to contain all +the information. The LFS provides the new type @w{@code{struct +dirent64}}. To use this we need a new function. + +@comment dirent.h +@comment GNU +@deftypefun int scandir64 (const char *@var{dir}, struct dirent64 ***@var{namelist}, int (*@var{selector}) (const struct dirent64 *), int (*@var{cmp}) (const struct dirent64 **, const struct dirent64 **)) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c See scandir. +The @code{scandir64} function works like the @code{scandir} function +except that the directory entries it returns are described by elements +of type @w{@code{struct dirent64}}. The function pointed to by +@var{selector} is again used to select the desired entries, except that +@var{selector} now must point to a function which takes a +@w{@code{struct dirent64 *}} parameter. + +Similarly the @var{cmp} function should expect its two arguments to be +of type @code{struct dirent64 **}. +@end deftypefun + +As @var{cmp} is now a function of a different type, the functions +@code{alphasort} and @code{versionsort} cannot be supplied for that +argument. Instead we provide the two replacement functions below. + +@comment dirent.h +@comment GNU +@deftypefun int alphasort64 (const struct dirent64 **@var{a}, const struct dirent **@var{b}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c See alphasort. +The @code{alphasort64} function behaves like the @code{strcoll} function +(@pxref{String/Array Comparison}). The difference is that the arguments +are not string pointers but instead they are of type +@code{struct dirent64 **}. + +Return value of @code{alphasort64} is less than, equal to, or greater +than zero depending on the order of the two entries @var{a} and @var{b}. +@end deftypefun + +@comment dirent.h +@comment GNU +@deftypefun int versionsort64 (const struct dirent64 **@var{a}, const struct dirent64 **@var{b}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c See versionsort. +The @code{versionsort64} function is like @code{alphasort64}, excepted that it +uses the @code{strverscmp} function internally. +@end deftypefun + +It is important not to mix the use of @code{scandir} and the 64-bit +comparison functions or vice versa. There are systems on which this +works but on others it will fail miserably. + +@node Simple Directory Lister Mark II +@subsection Simple Program to List a Directory, Mark II + +Here is a revised version of the directory lister found above +(@pxref{Simple Directory Lister}). Using the @code{scandir} function we +can avoid the functions which work directly with the directory contents. +After the call the returned entries are available for direct use. + +@smallexample +@include dir2.c.texi +@end smallexample + +Note the simple selector function in this example. Since we want to see +all directory entries we always return @code{1}. + + +@node Working with Directory Trees +@section Working with Directory Trees +@cindex directory hierarchy +@cindex hierarchy, directory +@cindex tree, directory + +The functions described so far for handling the files in a directory +have allowed you to either retrieve the information bit by bit, or to +process all the files as a group (see @code{scandir}). Sometimes it is +useful to process whole hierarchies of directories and their contained +files. The X/Open specification defines two functions to do this. The +simpler form is derived from an early definition in @w{System V} systems +and therefore this function is available on SVID-derived systems. The +prototypes and required definitions can be found in the @file{ftw.h} +header. + +There are four functions in this family: @code{ftw}, @code{nftw} and +their 64-bit counterparts @code{ftw64} and @code{nftw64}. These +functions take as one of their arguments a pointer to a callback +function of the appropriate type. + +@comment ftw.h +@comment GNU +@deftp {Data Type} __ftw_func_t + +@smallexample +int (*) (const char *, const struct stat *, int) +@end smallexample + +The type of callback functions given to the @code{ftw} function. The +first parameter points to the file name, the second parameter to an +object of type @code{struct stat} which is filled in for the file named +in the first parameter. + +@noindent +The last parameter is a flag giving more information about the current +file. It can have the following values: + +@vtable @code +@item FTW_F +The item is either a normal file or a file which does not fit into one +of the following categories. This could be special files, sockets etc. +@item FTW_D +The item is a directory. +@item FTW_NS +The @code{stat} call failed and so the information pointed to by the +second parameter is invalid. +@item FTW_DNR +The item is a directory which cannot be read. +@item FTW_SL +The item is a symbolic link. Since symbolic links are normally followed +seeing this value in a @code{ftw} callback function means the referenced +file does not exist. The situation for @code{nftw} is different. + +This value is only available if the program is compiled with +@code{_XOPEN_EXTENDED} defined before including +the first header. The original SVID systems do not have symbolic links. +@end vtable + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +type is in fact @code{__ftw64_func_t} since this mode changes +@code{struct stat} to be @code{struct stat64}. +@end deftp + +For the LFS interface and for use in the function @code{ftw64}, the +header @file{ftw.h} defines another function type. + +@comment ftw.h +@comment GNU +@deftp {Data Type} __ftw64_func_t + +@smallexample +int (*) (const char *, const struct stat64 *, int) +@end smallexample + +This type is used just like @code{__ftw_func_t} for the callback +function, but this time is called from @code{ftw64}. The second +parameter to the function is a pointer to a variable of type +@code{struct stat64} which is able to represent the larger values. +@end deftp + +@comment ftw.h +@comment GNU +@deftp {Data Type} __nftw_func_t + +@smallexample +int (*) (const char *, const struct stat *, int, struct FTW *) +@end smallexample + +The first three arguments are the same as for the @code{__ftw_func_t} +type. However for the third argument some additional values are defined +to allow finer differentiation: +@vtable @code +@item FTW_DP +The current item is a directory and all subdirectories have already been +visited and reported. This flag is returned instead of @code{FTW_D} if +the @code{FTW_DEPTH} flag is passed to @code{nftw} (see below). +@item FTW_SLN +The current item is a stale symbolic link. The file it points to does +not exist. +@end vtable + +The last parameter of the callback function is a pointer to a structure +with some extra information as described below. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +type is in fact @code{__nftw64_func_t} since this mode changes +@code{struct stat} to be @code{struct stat64}. +@end deftp + +For the LFS interface there is also a variant of this data type +available which has to be used with the @code{nftw64} function. + +@comment ftw.h +@comment GNU +@deftp {Data Type} __nftw64_func_t + +@smallexample +int (*) (const char *, const struct stat64 *, int, struct FTW *) +@end smallexample + +This type is used just like @code{__nftw_func_t} for the callback +function, but this time is called from @code{nftw64}. The second +parameter to the function is this time a pointer to a variable of type +@code{struct stat64} which is able to represent the larger values. +@end deftp + +@comment ftw.h +@comment XPG4.2 +@deftp {Data Type} {struct FTW} +The information contained in this structure helps in interpreting the +name parameter and gives some information about the current state of the +traversal of the directory hierarchy. + +@table @code +@item int base +The value is the offset into the string passed in the first parameter to +the callback function of the beginning of the file name. The rest of +the string is the path of the file. This information is especially +important if the @code{FTW_CHDIR} flag was set in calling @code{nftw} +since then the current directory is the one the current item is found +in. +@item int level +Whilst processing, the code tracks how many directories down it has gone +to find the current file. This nesting level starts at @math{0} for +files in the initial directory (or is zero for the initial file if a +file was passed). +@end table +@end deftp + + +@comment ftw.h +@comment SVID +@deftypefun int ftw (const char *@var{filename}, __ftw_func_t @var{func}, int @var{descriptors}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c see nftw for safety details +The @code{ftw} function calls the callback function given in the +parameter @var{func} for every item which is found in the directory +specified by @var{filename} and all directories below. The function +follows symbolic links if necessary but does not process an item twice. +If @var{filename} is not a directory then it itself is the only object +returned to the callback function. + +The file name passed to the callback function is constructed by taking +the @var{filename} parameter and appending the names of all passed +directories and then the local file name. So the callback function can +use this parameter to access the file. @code{ftw} also calls +@code{stat} for the file and passes that information on to the callback +function. If this @code{stat} call is not successful the failure is +indicated by setting the third argument of the callback function to +@code{FTW_NS}. Otherwise it is set according to the description given +in the account of @code{__ftw_func_t} above. + +The callback function is expected to return @math{0} to indicate that no +error occurred and that processing should continue. If an error +occurred in the callback function or it wants @code{ftw} to return +immediately, the callback function can return a value other than +@math{0}. This is the only correct way to stop the function. The +program must not use @code{setjmp} or similar techniques to continue +from another place. This would leave resources allocated by the +@code{ftw} function unfreed. + +The @var{descriptors} parameter to @code{ftw} specifies how many file +descriptors it is allowed to consume. The function runs faster the more +descriptors it can use. For each level in the directory hierarchy at +most one descriptor is used, but for very deep ones any limit on open +file descriptors for the process or the system may be exceeded. +Moreover, file descriptor limits in a multi-threaded program apply to +all the threads as a group, and therefore it is a good idea to supply a +reasonable limit to the number of open descriptors. + +The return value of the @code{ftw} function is @math{0} if all callback +function calls returned @math{0} and all actions performed by the +@code{ftw} succeeded. If a function call failed (other than calling +@code{stat} on an item) the function returns @math{-1}. If a callback +function returns a value other than @math{0} this value is returned as +the return value of @code{ftw}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit system this function is in fact @code{ftw64}, i.e., the LFS +interface transparently replaces the old interface. +@end deftypefun + +@comment ftw.h +@comment Unix98 +@deftypefun int ftw64 (const char *@var{filename}, __ftw64_func_t @var{func}, int @var{descriptors}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +This function is similar to @code{ftw} but it can work on filesystems +with large files. File information is reported using a variable of type +@code{struct stat64} which is passed by reference to the callback +function. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit system this function is available under the name @code{ftw} and +transparently replaces the old implementation. +@end deftypefun + +@comment ftw.h +@comment XPG4.2 +@deftypefun int nftw (const char *@var{filename}, __nftw_func_t @var{func}, int @var{descriptors}, int @var{flag}) +@safety{@prelim{}@mtsafe{@mtasscwd{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{} @acscwd{}}} +@c ftw_startup calls alloca, malloc, free, xstat/lxstat, tdestroy, and ftw_dir +@c if FTW_CHDIR, call open, and fchdir, or chdir and getcwd +@c ftw_dir calls open_dir_stream, readdir64, process_entry, closedir +@c if FTW_CHDIR, also calls fchdir +@c open_dir_stream calls malloc, realloc, readdir64, free, closedir, +@c then openat64_not_cancel_3 and fdopendir or opendir, then dirfd. +@c process_entry may cal realloc, fxstatat/lxstat/xstat, ftw_dir, and +@c find_object (tsearch) and add_object (tfind). +@c Since each invocation of *ftw uses its own private search tree, none +@c of the search tree concurrency issues apply. +The @code{nftw} function works like the @code{ftw} functions. They call +the callback function @var{func} for all items found in the directory +@var{filename} and below. At most @var{descriptors} file descriptors +are consumed during the @code{nftw} call. + +One difference is that the callback function is of a different type. It +is of type @w{@code{struct FTW *}} and provides the callback function +with the extra information described above. + +A second difference is that @code{nftw} takes a fourth argument, which +is @math{0} or a bitwise-OR combination of any of the following values. + +@vtable @code +@item FTW_PHYS +While traversing the directory symbolic links are not followed. Instead +symbolic links are reported using the @code{FTW_SL} value for the type +parameter to the callback function. If the file referenced by a +symbolic link does not exist @code{FTW_SLN} is returned instead. +@item FTW_MOUNT +The callback function is only called for items which are on the same +mounted filesystem as the directory given by the @var{filename} +parameter to @code{nftw}. +@item FTW_CHDIR +If this flag is given the current working directory is changed to the +directory of the reported object before the callback function is called. +When @code{ntfw} finally returns the current directory is restored to +its original value. +@item FTW_DEPTH +If this option is specified then all subdirectories and files within +them are processed before processing the top directory itself +(depth-first processing). This also means the type flag given to the +callback function is @code{FTW_DP} and not @code{FTW_D}. +@item FTW_ACTIONRETVAL +If this option is specified then return values from callbacks +are handled differently. If the callback returns @code{FTW_CONTINUE}, +walking continues normally. @code{FTW_STOP} means walking stops +and @code{FTW_STOP} is returned to the caller. If @code{FTW_SKIP_SUBTREE} +is returned by the callback with @code{FTW_D} argument, the subtree +is skipped and walking continues with next sibling of the directory. +If @code{FTW_SKIP_SIBLINGS} is returned by the callback, all siblings +of the current entry are skipped and walking continues in its parent. +No other return values should be returned from the callbacks if +this option is set. This option is a GNU extension. +@end vtable + +The return value is computed in the same way as for @code{ftw}. +@code{nftw} returns @math{0} if no failures occurred and all callback +functions returned @math{0}. In case of internal errors, such as memory +problems, the return value is @math{-1} and @var{errno} is set +accordingly. If the return value of a callback invocation was non-zero +then that value is returned. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit system this function is in fact @code{nftw64}, i.e., the LFS +interface transparently replaces the old interface. +@end deftypefun + +@comment ftw.h +@comment Unix98 +@deftypefun int nftw64 (const char *@var{filename}, __nftw64_func_t @var{func}, int @var{descriptors}, int @var{flag}) +@safety{@prelim{}@mtsafe{@mtasscwd{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{} @acscwd{}}} +This function is similar to @code{nftw} but it can work on filesystems +with large files. File information is reported using a variable of type +@code{struct stat64} which is passed by reference to the callback +function. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit system this function is available under the name @code{nftw} and +transparently replaces the old implementation. +@end deftypefun + + +@node Hard Links +@section Hard Links +@cindex hard link +@cindex link, hard +@cindex multiple names for one file +@cindex file names, multiple + +In POSIX systems, one file can have many names at the same time. All of +the names are equally real, and no one of them is preferred to the +others. + +To add a name to a file, use the @code{link} function. (The new name is +also called a @dfn{hard link} to the file.) Creating a new link to a +file does not copy the contents of the file; it simply makes a new name +by which the file can be known, in addition to the file's existing name +or names. + +One file can have names in several directories, so the organization +of the file system is not a strict hierarchy or tree. + +In most implementations, it is not possible to have hard links to the +same file in multiple file systems. @code{link} reports an error if you +try to make a hard link to the file from another file system when this +cannot be done. + +The prototype for the @code{link} function is declared in the header +file @file{unistd.h}. +@pindex unistd.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun int link (const char *@var{oldname}, const char *@var{newname}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{link} function makes a new link to the existing file named by +@var{oldname}, under the new name @var{newname}. + +This function returns a value of @code{0} if it is successful and +@code{-1} on failure. In addition to the usual file name errors +(@pxref{File Name Errors}) for both @var{oldname} and @var{newname}, the +following @code{errno} error conditions are defined for this function: + +@table @code +@item EACCES +You are not allowed to write to the directory in which the new link is +to be written. +@ignore +Some implementations also require that the existing file be accessible +by the caller, and use this error to report failure for that reason. +@end ignore + +@item EEXIST +There is already a file named @var{newname}. If you want to replace +this link with a new link, you must remove the old link explicitly first. + +@item EMLINK +There are already too many links to the file named by @var{oldname}. +(The maximum number of links to a file is @w{@code{LINK_MAX}}; see +@ref{Limits for Files}.) + +@item ENOENT +The file named by @var{oldname} doesn't exist. You can't make a link to +a file that doesn't exist. + +@item ENOSPC +The directory or file system that would contain the new link is full +and cannot be extended. + +@item EPERM +On @gnulinuxhurdsystems{} and some others, you cannot make links to +directories. +Many systems allow only privileged users to do so. This error +is used to report the problem. + +@item EROFS +The directory containing the new link can't be modified because it's on +a read-only file system. + +@item EXDEV +The directory specified in @var{newname} is on a different file system +than the existing file. + +@item EIO +A hardware error occurred while trying to read or write the to filesystem. +@end table +@end deftypefun + +@node Symbolic Links +@section Symbolic Links +@cindex soft link +@cindex link, soft +@cindex symbolic link +@cindex link, symbolic + +@gnusystems{} support @dfn{soft links} or @dfn{symbolic links}. This +is a kind of ``file'' that is essentially a pointer to another file +name. Unlike hard links, symbolic links can be made to directories or +across file systems with no restrictions. You can also make a symbolic +link to a name which is not the name of any file. (Opening this link +will fail until a file by that name is created.) Likewise, if the +symbolic link points to an existing file which is later deleted, the +symbolic link continues to point to the same file name even though the +name no longer names any file. + +The reason symbolic links work the way they do is that special things +happen when you try to open the link. The @code{open} function realizes +you have specified the name of a link, reads the file name contained in +the link, and opens that file name instead. The @code{stat} function +likewise operates on the file that the symbolic link points to, instead +of on the link itself. + +By contrast, other operations such as deleting or renaming the file +operate on the link itself. The functions @code{readlink} and +@code{lstat} also refrain from following symbolic links, because their +purpose is to obtain information about the link. @code{link}, the +function that makes a hard link, does too. It makes a hard link to the +symbolic link, which one rarely wants. + +Some systems have, for some functions operating on files, a limit on +how many symbolic links are followed when resolving a path name. The +limit if it exists is published in the @file{sys/param.h} header file. + +@comment sys/param.h +@comment BSD +@deftypevr Macro int MAXSYMLINKS + +The macro @code{MAXSYMLINKS} specifies how many symlinks some function +will follow before returning @code{ELOOP}. Not all functions behave the +same and this value is not the same as that returned for +@code{_SC_SYMLOOP} by @code{sysconf}. In fact, the @code{sysconf} +result can indicate that there is no fixed limit although +@code{MAXSYMLINKS} exists and has a finite value. +@end deftypevr + +Prototypes for most of the functions listed in this section are in +@file{unistd.h}. +@pindex unistd.h + +@comment unistd.h +@comment BSD +@deftypefun int symlink (const char *@var{oldname}, const char *@var{newname}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{symlink} function makes a symbolic link to @var{oldname} named +@var{newname}. + +The normal return value from @code{symlink} is @code{0}. A return value +of @code{-1} indicates an error. In addition to the usual file name +syntax errors (@pxref{File Name Errors}), the following @code{errno} +error conditions are defined for this function: + +@table @code +@item EEXIST +There is already an existing file named @var{newname}. + +@item EROFS +The file @var{newname} would exist on a read-only file system. + +@item ENOSPC +The directory or file system cannot be extended to make the new link. + +@item EIO +A hardware error occurred while reading or writing data on the disk. + +@comment not sure about these +@ignore +@item ELOOP +There are too many levels of indirection. This can be the result of +circular symbolic links to directories. + +@item EDQUOT +The new link can't be created because the user's disk quota has been +exceeded. +@end ignore +@end table +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun ssize_t readlink (const char *@var{filename}, char *@var{buffer}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{readlink} function gets the value of the symbolic link +@var{filename}. The file name that the link points to is copied into +@var{buffer}. This file name string is @emph{not} null-terminated; +@code{readlink} normally returns the number of characters copied. The +@var{size} argument specifies the maximum number of characters to copy, +usually the allocation size of @var{buffer}. + +If the return value equals @var{size}, you cannot tell whether or not +there was room to return the entire name. So make a bigger buffer and +call @code{readlink} again. Here is an example: + +@smallexample +char * +readlink_malloc (const char *filename) +@{ + int size = 100; + char *buffer = NULL; + + while (1) + @{ + buffer = (char *) xrealloc (buffer, size); + int nchars = readlink (filename, buffer, size); + if (nchars < 0) + @{ + free (buffer); + return NULL; + @} + if (nchars < size) + return buffer; + size *= 2; + @} +@} +@end smallexample + +@c @group Invalid outside example. +A value of @code{-1} is returned in case of error. In addition to the +usual file name errors (@pxref{File Name Errors}), the following +@code{errno} error conditions are defined for this function: + +@table @code +@item EINVAL +The named file is not a symbolic link. + +@item EIO +A hardware error occurred while reading or writing data on the disk. +@end table +@c @end group +@end deftypefun + +In some situations it is desirable to resolve all the +symbolic links to get the real +name of a file where no prefix names a symbolic link which is followed +and no filename in the path is @code{.} or @code{..}. This is for +instance desirable if files have to be compared in which case different +names can refer to the same inode. + +@comment stdlib.h +@comment GNU +@deftypefun {char *} canonicalize_file_name (const char *@var{name}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c Calls realpath. + +The @code{canonicalize_file_name} function returns the absolute name of +the file named by @var{name} which contains no @code{.}, @code{..} +components nor any repeated path separators (@code{/}) or symlinks. The +result is passed back as the return value of the function in a block of +memory allocated with @code{malloc}. If the result is not used anymore +the memory should be freed with a call to @code{free}. + +If any of the path components are missing the function returns a NULL +pointer. This is also what is returned if the length of the path +reaches or exceeds @code{PATH_MAX} characters. In any case +@code{errno} is set accordingly. + +@table @code +@item ENAMETOOLONG +The resulting path is too long. This error only occurs on systems which +have a limit on the file name length. + +@item EACCES +At least one of the path components is not readable. + +@item ENOENT +The input file name is empty. + +@item ENOENT +At least one of the path components does not exist. + +@item ELOOP +More than @code{MAXSYMLINKS} many symlinks have been followed. +@end table + +This function is a GNU extension and is declared in @file{stdlib.h}. +@end deftypefun + +The Unix standard includes a similar function which differs from +@code{canonicalize_file_name} in that the user has to provide the buffer +where the result is placed in. + +@comment stdlib.h +@comment XPG +@deftypefun {char *} realpath (const char *restrict @var{name}, char *restrict @var{resolved}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c Calls malloc, realloc, getcwd, lxstat64, readlink, alloca. + +A call to @code{realpath} where the @var{resolved} parameter is +@code{NULL} behaves exactly like @code{canonicalize_file_name}. The +function allocates a buffer for the file name and returns a pointer to +it. If @var{resolved} is not @code{NULL} it points to a buffer into +which the result is copied. It is the callers responsibility to +allocate a buffer which is large enough. On systems which define +@code{PATH_MAX} this means the buffer must be large enough for a +pathname of this size. For systems without limitations on the pathname +length the requirement cannot be met and programs should not call +@code{realpath} with anything but @code{NULL} for the second parameter. + +One other difference is that the buffer @var{resolved} (if nonzero) will +contain the part of the path component which does not exist or is not +readable if the function returns @code{NULL} and @code{errno} is set to +@code{EACCES} or @code{ENOENT}. + +This function is declared in @file{stdlib.h}. +@end deftypefun + +The advantage of using this function is that it is more widely +available. The drawback is that it reports failures for long paths on +systems which have no limits on the file name length. + +@node Deleting Files +@section Deleting Files +@cindex deleting a file +@cindex removing a file +@cindex unlinking a file + +You can delete a file with @code{unlink} or @code{remove}. + +Deletion actually deletes a file name. If this is the file's only name, +then the file is deleted as well. If the file has other remaining names +(@pxref{Hard Links}), it remains accessible under those names. + +@comment unistd.h +@comment POSIX.1 +@deftypefun int unlink (const char *@var{filename}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{unlink} function deletes the file name @var{filename}. If +this is a file's sole name, the file itself is also deleted. (Actually, +if any process has the file open when this happens, deletion is +postponed until all processes have closed the file.) + +@pindex unistd.h +The function @code{unlink} is declared in the header file @file{unistd.h}. + +This function returns @code{0} on successful completion, and @code{-1} +on error. In addition to the usual file name errors +(@pxref{File Name Errors}), the following @code{errno} error conditions are +defined for this function: + +@table @code +@item EACCES +Write permission is denied for the directory from which the file is to be +removed, or the directory has the sticky bit set and you do not own the file. + +@item EBUSY +This error indicates that the file is being used by the system in such a +way that it can't be unlinked. For example, you might see this error if +the file name specifies the root directory or a mount point for a file +system. + +@item ENOENT +The file name to be deleted doesn't exist. + +@item EPERM +On some systems @code{unlink} cannot be used to delete the name of a +directory, or at least can only be used this way by a privileged user. +To avoid such problems, use @code{rmdir} to delete directories. (On +@gnulinuxhurdsystems{} @code{unlink} can never delete the name of a directory.) + +@item EROFS +The directory containing the file name to be deleted is on a read-only +file system and can't be modified. +@end table +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int rmdir (const char *@var{filename}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@cindex directories, deleting +@cindex deleting a directory +The @code{rmdir} function deletes a directory. The directory must be +empty before it can be removed; in other words, it can only contain +entries for @file{.} and @file{..}. + +In most other respects, @code{rmdir} behaves like @code{unlink}. There +are two additional @code{errno} error conditions defined for +@code{rmdir}: + +@table @code +@item ENOTEMPTY +@itemx EEXIST +The directory to be deleted is not empty. +@end table + +These two error codes are synonymous; some systems use one, and some use +the other. @gnulinuxhurdsystems{} always use @code{ENOTEMPTY}. + +The prototype for this function is declared in the header file +@file{unistd.h}. +@pindex unistd.h +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int remove (const char *@var{filename}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Calls unlink and rmdir. +This is the @w{ISO C} function to remove a file. It works like +@code{unlink} for files and like @code{rmdir} for directories. +@code{remove} is declared in @file{stdio.h}. +@pindex stdio.h +@end deftypefun + +@node Renaming Files +@section Renaming Files + +The @code{rename} function is used to change a file's name. + +@cindex renaming a file +@comment stdio.h +@comment ISO +@deftypefun int rename (const char *@var{oldname}, const char *@var{newname}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c In the absence of a rename syscall, there's an emulation with link +@c and unlink, but it's racy, even more so if newname exists and is +@c unlinked first. +The @code{rename} function renames the file @var{oldname} to +@var{newname}. The file formerly accessible under the name +@var{oldname} is afterwards accessible as @var{newname} instead. (If +the file had any other names aside from @var{oldname}, it continues to +have those names.) + +The directory containing the name @var{newname} must be on the same file +system as the directory containing the name @var{oldname}. + +One special case for @code{rename} is when @var{oldname} and +@var{newname} are two names for the same file. The consistent way to +handle this case is to delete @var{oldname}. However, in this case +POSIX requires that @code{rename} do nothing and report success---which +is inconsistent. We don't know what your operating system will do. + +If @var{oldname} is not a directory, then any existing file named +@var{newname} is removed during the renaming operation. However, if +@var{newname} is the name of a directory, @code{rename} fails in this +case. + +If @var{oldname} is a directory, then either @var{newname} must not +exist or it must name a directory that is empty. In the latter case, +the existing directory named @var{newname} is deleted first. The name +@var{newname} must not specify a subdirectory of the directory +@code{oldname} which is being renamed. + +One useful feature of @code{rename} is that the meaning of @var{newname} +changes ``atomically'' from any previously existing file by that name to +its new meaning (i.e., the file that was called @var{oldname}). There is +no instant at which @var{newname} is non-existent ``in between'' the old +meaning and the new meaning. If there is a system crash during the +operation, it is possible for both names to still exist; but +@var{newname} will always be intact if it exists at all. + +If @code{rename} fails, it returns @code{-1}. In addition to the usual +file name errors (@pxref{File Name Errors}), the following +@code{errno} error conditions are defined for this function: + +@table @code +@item EACCES +One of the directories containing @var{newname} or @var{oldname} +refuses write permission; or @var{newname} and @var{oldname} are +directories and write permission is refused for one of them. + +@item EBUSY +A directory named by @var{oldname} or @var{newname} is being used by +the system in a way that prevents the renaming from working. This includes +directories that are mount points for filesystems, and directories +that are the current working directories of processes. + +@item ENOTEMPTY +@itemx EEXIST +The directory @var{newname} isn't empty. @gnulinuxhurdsystems{} always return +@code{ENOTEMPTY} for this, but some other systems return @code{EEXIST}. + +@item EINVAL +@var{oldname} is a directory that contains @var{newname}. + +@item EISDIR +@var{newname} is a directory but the @var{oldname} isn't. + +@item EMLINK +The parent directory of @var{newname} would have too many links +(entries). + +@item ENOENT +The file @var{oldname} doesn't exist. + +@item ENOSPC +The directory that would contain @var{newname} has no room for another +entry, and there is no space left in the file system to expand it. + +@item EROFS +The operation would involve writing to a directory on a read-only file +system. + +@item EXDEV +The two file names @var{newname} and @var{oldname} are on different +file systems. +@end table +@end deftypefun + +@node Creating Directories +@section Creating Directories +@cindex creating a directory +@cindex directories, creating + +@pindex mkdir +Directories are created with the @code{mkdir} function. (There is also +a shell command @code{mkdir} which does the same thing.) +@c !!! umask + +@comment sys/stat.h +@comment POSIX.1 +@deftypefun int mkdir (const char *@var{filename}, mode_t @var{mode}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{mkdir} function creates a new, empty directory with name +@var{filename}. + +The argument @var{mode} specifies the file permissions for the new +directory file. @xref{Permission Bits}, for more information about +this. + +A return value of @code{0} indicates successful completion, and +@code{-1} indicates failure. In addition to the usual file name syntax +errors (@pxref{File Name Errors}), the following @code{errno} error +conditions are defined for this function: + +@table @code +@item EACCES +Write permission is denied for the parent directory in which the new +directory is to be added. + +@item EEXIST +A file named @var{filename} already exists. + +@item EMLINK +The parent directory has too many links (entries). + +Well-designed file systems never report this error, because they permit +more links than your disk could possibly hold. However, you must still +take account of the possibility of this error, as it could result from +network access to a file system on another machine. + +@item ENOSPC +The file system doesn't have enough room to create the new directory. + +@item EROFS +The parent directory of the directory being created is on a read-only +file system and cannot be modified. +@end table + +To use this function, your program should include the header file +@file{sys/stat.h}. +@pindex sys/stat.h +@end deftypefun + +@node File Attributes +@section File Attributes + +@pindex ls +When you issue an @samp{ls -l} shell command on a file, it gives you +information about the size of the file, who owns it, when it was last +modified, etc. These are called the @dfn{file attributes}, and are +associated with the file itself and not a particular one of its names. + +This section contains information about how you can inquire about and +modify the attributes of a file. + +@menu +* Attribute Meanings:: The names of the file attributes, + and what their values mean. +* Reading Attributes:: How to read the attributes of a file. +* Testing File Type:: Distinguishing ordinary files, + directories, links@dots{} +* File Owner:: How ownership for new files is determined, + and how to change it. +* Permission Bits:: How information about a file's access + mode is stored. +* Access Permission:: How the system decides who can access a file. +* Setting Permissions:: How permissions for new files are assigned, + and how to change them. +* Testing File Access:: How to find out if your process can + access a file. +* File Times:: About the time attributes of a file. +* File Size:: Manually changing the size of a file. +* Storage Allocation:: Allocate backing storage for files. +@end menu + +@node Attribute Meanings +@subsection The meaning of the File Attributes +@cindex status of a file +@cindex attributes of a file +@cindex file attributes + +When you read the attributes of a file, they come back in a structure +called @code{struct stat}. This section describes the names of the +attributes, their data types, and what they mean. For the functions +to read the attributes of a file, see @ref{Reading Attributes}. + +The header file @file{sys/stat.h} declares all the symbols defined +in this section. +@pindex sys/stat.h + +@comment sys/stat.h +@comment POSIX.1 +@deftp {Data Type} {struct stat} +The @code{stat} structure type is used to return information about the +attributes of a file. It contains at least the following members: + +@table @code +@item mode_t st_mode +Specifies the mode of the file. This includes file type information +(@pxref{Testing File Type}) and the file permission bits +(@pxref{Permission Bits}). + +@item ino_t st_ino +The file serial number, which distinguishes this file from all other +files on the same device. + +@item dev_t st_dev +Identifies the device containing the file. The @code{st_ino} and +@code{st_dev}, taken together, uniquely identify the file. The +@code{st_dev} value is not necessarily consistent across reboots or +system crashes, however. + +@item nlink_t st_nlink +The number of hard links to the file. This count keeps track of how +many directories have entries for this file. If the count is ever +decremented to zero, then the file itself is discarded as soon as no +process still holds it open. Symbolic links are not counted in the +total. + +@item uid_t st_uid +The user ID of the file's owner. @xref{File Owner}. + +@item gid_t st_gid +The group ID of the file. @xref{File Owner}. + +@item off_t st_size +This specifies the size of a regular file in bytes. For files that are +really devices this field isn't usually meaningful. For symbolic links +this specifies the length of the file name the link refers to. + +@item time_t st_atime +This is the last access time for the file. @xref{File Times}. + +@item unsigned long int st_atime_usec +This is the fractional part of the last access time for the file. +@xref{File Times}. + +@item time_t st_mtime +This is the time of the last modification to the contents of the file. +@xref{File Times}. + +@item unsigned long int st_mtime_usec +This is the fractional part of the time of the last modification to the +contents of the file. @xref{File Times}. + +@item time_t st_ctime +This is the time of the last modification to the attributes of the file. +@xref{File Times}. + +@item unsigned long int st_ctime_usec +This is the fractional part of the time of the last modification to the +attributes of the file. @xref{File Times}. + +@c !!! st_rdev +@item blkcnt_t st_blocks +This is the amount of disk space that the file occupies, measured in +units of 512-byte blocks. + +The number of disk blocks is not strictly proportional to the size of +the file, for two reasons: the file system may use some blocks for +internal record keeping; and the file may be sparse---it may have +``holes'' which contain zeros but do not actually take up space on the +disk. + +You can tell (approximately) whether a file is sparse by comparing this +value with @code{st_size}, like this: + +@smallexample +(st.st_blocks * 512 < st.st_size) +@end smallexample + +This test is not perfect because a file that is just slightly sparse +might not be detected as sparse at all. For practical applications, +this is not a problem. + +@item unsigned int st_blksize +The optimal block size for reading or writing this file, in bytes. You +might use this size for allocating the buffer space for reading or +writing the file. (This is unrelated to @code{st_blocks}.) +@end table +@end deftp + +The extensions for the Large File Support (LFS) require, even on 32-bit +machines, types which can handle file sizes up to @twoexp{63}. +Therefore a new definition of @code{struct stat} is necessary. + +@comment sys/stat.h +@comment LFS +@deftp {Data Type} {struct stat64} +The members of this type are the same and have the same names as those +in @code{struct stat}. The only difference is that the members +@code{st_ino}, @code{st_size}, and @code{st_blocks} have a different +type to support larger values. + +@table @code +@item mode_t st_mode +Specifies the mode of the file. This includes file type information +(@pxref{Testing File Type}) and the file permission bits +(@pxref{Permission Bits}). + +@item ino64_t st_ino +The file serial number, which distinguishes this file from all other +files on the same device. + +@item dev_t st_dev +Identifies the device containing the file. The @code{st_ino} and +@code{st_dev}, taken together, uniquely identify the file. The +@code{st_dev} value is not necessarily consistent across reboots or +system crashes, however. + +@item nlink_t st_nlink +The number of hard links to the file. This count keeps track of how +many directories have entries for this file. If the count is ever +decremented to zero, then the file itself is discarded as soon as no +process still holds it open. Symbolic links are not counted in the +total. + +@item uid_t st_uid +The user ID of the file's owner. @xref{File Owner}. + +@item gid_t st_gid +The group ID of the file. @xref{File Owner}. + +@item off64_t st_size +This specifies the size of a regular file in bytes. For files that are +really devices this field isn't usually meaningful. For symbolic links +this specifies the length of the file name the link refers to. + +@item time_t st_atime +This is the last access time for the file. @xref{File Times}. + +@item unsigned long int st_atime_usec +This is the fractional part of the last access time for the file. +@xref{File Times}. + +@item time_t st_mtime +This is the time of the last modification to the contents of the file. +@xref{File Times}. + +@item unsigned long int st_mtime_usec +This is the fractional part of the time of the last modification to the +contents of the file. @xref{File Times}. + +@item time_t st_ctime +This is the time of the last modification to the attributes of the file. +@xref{File Times}. + +@item unsigned long int st_ctime_usec +This is the fractional part of the time of the last modification to the +attributes of the file. @xref{File Times}. + +@c !!! st_rdev +@item blkcnt64_t st_blocks +This is the amount of disk space that the file occupies, measured in +units of 512-byte blocks. + +@item unsigned int st_blksize +The optimal block size for reading of writing this file, in bytes. You +might use this size for allocating the buffer space for reading of +writing the file. (This is unrelated to @code{st_blocks}.) +@end table +@end deftp + +Some of the file attributes have special data type names which exist +specifically for those attributes. (They are all aliases for well-known +integer types that you know and love.) These typedef names are defined +in the header file @file{sys/types.h} as well as in @file{sys/stat.h}. +Here is a list of them. + +@comment sys/types.h +@comment POSIX.1 +@deftp {Data Type} mode_t +This is an integer data type used to represent file modes. In +@theglibc{}, this is an unsigned type no narrower than @code{unsigned +int}. +@end deftp + +@cindex inode number +@comment sys/types.h +@comment POSIX.1 +@deftp {Data Type} ino_t +This is an unsigned integer type used to represent file serial numbers. +(In Unix jargon, these are sometimes called @dfn{inode numbers}.) +In @theglibc{}, this type is no narrower than @code{unsigned int}. + +If the source is compiled with @code{_FILE_OFFSET_BITS == 64} this type +is transparently replaced by @code{ino64_t}. +@end deftp + +@comment sys/types.h +@comment Unix98 +@deftp {Data Type} ino64_t +This is an unsigned integer type used to represent file serial numbers +for the use in LFS. In @theglibc{}, this type is no narrower than +@code{unsigned int}. + +When compiling with @code{_FILE_OFFSET_BITS == 64} this type is +available under the name @code{ino_t}. +@end deftp + +@comment sys/types.h +@comment POSIX.1 +@deftp {Data Type} dev_t +This is an arithmetic data type used to represent file device numbers. +In @theglibc{}, this is an integer type no narrower than @code{int}. +@end deftp + +@comment sys/types.h +@comment POSIX.1 +@deftp {Data Type} nlink_t +This is an integer type used to represent file link counts. +@end deftp + +@comment sys/types.h +@comment Unix98 +@deftp {Data Type} blkcnt_t +This is a signed integer type used to represent block counts. +In @theglibc{}, this type is no narrower than @code{int}. + +If the source is compiled with @code{_FILE_OFFSET_BITS == 64} this type +is transparently replaced by @code{blkcnt64_t}. +@end deftp + +@comment sys/types.h +@comment Unix98 +@deftp {Data Type} blkcnt64_t +This is a signed integer type used to represent block counts for the +use in LFS. In @theglibc{}, this type is no narrower than @code{int}. + +When compiling with @code{_FILE_OFFSET_BITS == 64} this type is +available under the name @code{blkcnt_t}. +@end deftp + +@node Reading Attributes +@subsection Reading the Attributes of a File + +To examine the attributes of files, use the functions @code{stat}, +@code{fstat} and @code{lstat}. They return the attribute information in +a @code{struct stat} object. All three functions are declared in the +header file @file{sys/stat.h}. + +@comment sys/stat.h +@comment POSIX.1 +@deftypefun int stat (const char *@var{filename}, struct stat *@var{buf}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{stat} function returns information about the attributes of the +file named by @w{@var{filename}} in the structure pointed to by @var{buf}. + +If @var{filename} is the name of a symbolic link, the attributes you get +describe the file that the link points to. If the link points to a +nonexistent file name, then @code{stat} fails reporting a nonexistent +file. + +The return value is @code{0} if the operation is successful, or +@code{-1} on failure. In addition to the usual file name errors +(@pxref{File Name Errors}, the following @code{errno} error conditions +are defined for this function: + +@table @code +@item ENOENT +The file named by @var{filename} doesn't exist. +@end table + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is in fact @code{stat64} since the LFS interface transparently +replaces the normal implementation. +@end deftypefun + +@comment sys/stat.h +@comment Unix98 +@deftypefun int stat64 (const char *@var{filename}, struct stat64 *@var{buf}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{stat} but it is also able to work on +files larger than @twoexp{31} bytes on 32-bit systems. To be able to do +this the result is stored in a variable of type @code{struct stat64} to +which @var{buf} must point. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is available under the name @code{stat} and so transparently +replaces the interface for small files on 32-bit machines. +@end deftypefun + +@comment sys/stat.h +@comment POSIX.1 +@deftypefun int fstat (int @var{filedes}, struct stat *@var{buf}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fstat} function is like @code{stat}, except that it takes an +open file descriptor as an argument instead of a file name. +@xref{Low-Level I/O}. + +Like @code{stat}, @code{fstat} returns @code{0} on success and @code{-1} +on failure. The following @code{errno} error conditions are defined for +@code{fstat}: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. +@end table + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is in fact @code{fstat64} since the LFS interface transparently +replaces the normal implementation. +@end deftypefun + +@comment sys/stat.h +@comment Unix98 +@deftypefun int fstat64 (int @var{filedes}, struct stat64 *@var{buf}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{fstat} but is able to work on large +files on 32-bit platforms. For large files the file descriptor +@var{filedes} should be obtained by @code{open64} or @code{creat64}. +The @var{buf} pointer points to a variable of type @code{struct stat64} +which is able to represent the larger values. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is available under the name @code{fstat} and so transparently +replaces the interface for small files on 32-bit machines. +@end deftypefun + +@c fstatat will call alloca and snprintf if the syscall is not +@c available. +@c @safety{@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} + +@comment sys/stat.h +@comment BSD +@deftypefun int lstat (const char *@var{filename}, struct stat *@var{buf}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct system call through lxstat, sometimes with an xstat conv call +@c afterwards. +The @code{lstat} function is like @code{stat}, except that it does not +follow symbolic links. If @var{filename} is the name of a symbolic +link, @code{lstat} returns information about the link itself; otherwise +@code{lstat} works like @code{stat}. @xref{Symbolic Links}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is in fact @code{lstat64} since the LFS interface transparently +replaces the normal implementation. +@end deftypefun + +@comment sys/stat.h +@comment Unix98 +@deftypefun int lstat64 (const char *@var{filename}, struct stat64 *@var{buf}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct system call through lxstat64, sometimes with an xstat conv +@c call afterwards. +This function is similar to @code{lstat} but it is also able to work on +files larger than @twoexp{31} bytes on 32-bit systems. To be able to do +this the result is stored in a variable of type @code{struct stat64} to +which @var{buf} must point. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is available under the name @code{lstat} and so transparently +replaces the interface for small files on 32-bit machines. +@end deftypefun + +@node Testing File Type +@subsection Testing the Type of a File + +The @dfn{file mode}, stored in the @code{st_mode} field of the file +attributes, contains two kinds of information: the file type code, and +the access permission bits. This section discusses only the type code, +which you can use to tell whether the file is a directory, socket, +symbolic link, and so on. For details about access permissions see +@ref{Permission Bits}. + +There are two ways you can access the file type information in a file +mode. Firstly, for each file type there is a @dfn{predicate macro} +which examines a given file mode and returns whether it is of that type +or not. Secondly, you can mask out the rest of the file mode to leave +just the file type code, and compare this against constants for each of +the supported file types. + +All of the symbols listed in this section are defined in the header file +@file{sys/stat.h}. +@pindex sys/stat.h + +The following predicate macros test the type of a file, given the value +@var{m} which is the @code{st_mode} field returned by @code{stat} on +that file: + +@comment sys/stat.h +@comment POSIX +@deftypefn Macro int S_ISDIR (mode_t @var{m}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns non-zero if the file is a directory. +@end deftypefn + +@comment sys/stat.h +@comment POSIX +@deftypefn Macro int S_ISCHR (mode_t @var{m}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns non-zero if the file is a character special file (a +device like a terminal). +@end deftypefn + +@comment sys/stat.h +@comment POSIX +@deftypefn Macro int S_ISBLK (mode_t @var{m}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns non-zero if the file is a block special file (a device +like a disk). +@end deftypefn + +@comment sys/stat.h +@comment POSIX +@deftypefn Macro int S_ISREG (mode_t @var{m}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns non-zero if the file is a regular file. +@end deftypefn + +@comment sys/stat.h +@comment POSIX +@deftypefn Macro int S_ISFIFO (mode_t @var{m}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns non-zero if the file is a FIFO special file, or a +pipe. @xref{Pipes and FIFOs}. +@end deftypefn + +@comment sys/stat.h +@comment GNU +@deftypefn Macro int S_ISLNK (mode_t @var{m}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns non-zero if the file is a symbolic link. +@xref{Symbolic Links}. +@end deftypefn + +@comment sys/stat.h +@comment GNU +@deftypefn Macro int S_ISSOCK (mode_t @var{m}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns non-zero if the file is a socket. @xref{Sockets}. +@end deftypefn + +An alternate non-POSIX method of testing the file type is supported for +compatibility with BSD. The mode can be bitwise AND-ed with +@code{S_IFMT} to extract the file type code, and compared to the +appropriate constant. For example, + +@smallexample +S_ISCHR (@var{mode}) +@end smallexample + +@noindent +is equivalent to: + +@smallexample +((@var{mode} & S_IFMT) == S_IFCHR) +@end smallexample + +@comment sys/stat.h +@comment BSD +@deftypevr Macro int S_IFMT +This is a bit mask used to extract the file type code from a mode value. +@end deftypevr + +These are the symbolic names for the different file type codes: + +@vtable @code +@comment sys/stat.h +@comment BSD +@item S_IFDIR +This is the file type constant of a directory file. + +@comment sys/stat.h +@comment BSD +@item S_IFCHR +This is the file type constant of a character-oriented device file. + +@comment sys/stat.h +@comment BSD +@item S_IFBLK +This is the file type constant of a block-oriented device file. + +@comment sys/stat.h +@comment BSD +@item S_IFREG +This is the file type constant of a regular file. + +@comment sys/stat.h +@comment BSD +@item S_IFLNK +This is the file type constant of a symbolic link. + +@comment sys/stat.h +@comment BSD +@item S_IFSOCK +This is the file type constant of a socket. + +@comment sys/stat.h +@comment BSD +@item S_IFIFO +This is the file type constant of a FIFO or pipe. +@end vtable + +The POSIX.1b standard introduced a few more objects which possibly can +be implemented as objects in the filesystem. These are message queues, +semaphores, and shared memory objects. To allow differentiating these +objects from other files the POSIX standard introduced three new test +macros. But unlike the other macros they do not take the value of the +@code{st_mode} field as the parameter. Instead they expect a pointer to +the whole @code{struct stat} structure. + +@comment sys/stat.h +@comment POSIX +@deftypefn Macro int S_TYPEISMQ (struct stat *@var{s}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If the system implements POSIX message queues as distinct objects and the +file is a message queue object, this macro returns a non-zero value. +In all other cases the result is zero. +@end deftypefn + +@comment sys/stat.h +@comment POSIX +@deftypefn Macro int S_TYPEISSEM (struct stat *@var{s}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If the system implements POSIX semaphores as distinct objects and the +file is a semaphore object, this macro returns a non-zero value. +In all other cases the result is zero. +@end deftypefn + +@comment sys/stat.h +@comment POSIX +@deftypefn Macro int S_TYPEISSHM (struct stat *@var{s}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If the system implements POSIX shared memory objects as distinct objects +and the file is a shared memory object, this macro returns a non-zero +value. In all other cases the result is zero. +@end deftypefn + +@node File Owner +@subsection File Owner +@cindex file owner +@cindex owner of a file +@cindex group owner of a file + +Every file has an @dfn{owner} which is one of the registered user names +defined on the system. Each file also has a @dfn{group} which is one of +the defined groups. The file owner can often be useful for showing you +who edited the file (especially when you edit with GNU Emacs), but its +main purpose is for access control. + +The file owner and group play a role in determining access because the +file has one set of access permission bits for the owner, another set +that applies to users who belong to the file's group, and a third set of +bits that applies to everyone else. @xref{Access Permission}, for the +details of how access is decided based on this data. + +When a file is created, its owner is set to the effective user ID of the +process that creates it (@pxref{Process Persona}). The file's group ID +may be set to either the effective group ID of the process, or the group +ID of the directory that contains the file, depending on the system +where the file is stored. When you access a remote file system, it +behaves according to its own rules, not according to the system your +program is running on. Thus, your program must be prepared to encounter +either kind of behavior no matter what kind of system you run it on. + +@pindex chown +@pindex chgrp +You can change the owner and/or group owner of an existing file using +the @code{chown} function. This is the primitive for the @code{chown} +and @code{chgrp} shell commands. + +@pindex unistd.h +The prototype for this function is declared in @file{unistd.h}. + +@comment unistd.h +@comment POSIX.1 +@deftypefun int chown (const char *@var{filename}, uid_t @var{owner}, gid_t @var{group}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{chown} function changes the owner of the file @var{filename} to +@var{owner}, and its group owner to @var{group}. + +Changing the owner of the file on certain systems clears the set-user-ID +and set-group-ID permission bits. (This is because those bits may not +be appropriate for the new owner.) Other file permission bits are not +changed. + +The return value is @code{0} on success and @code{-1} on failure. +In addition to the usual file name errors (@pxref{File Name Errors}), +the following @code{errno} error conditions are defined for this function: + +@table @code +@item EPERM +This process lacks permission to make the requested change. + +Only privileged users or the file's owner can change the file's group. +On most file systems, only privileged users can change the file owner; +some file systems allow you to change the owner if you are currently the +owner. When you access a remote file system, the behavior you encounter +is determined by the system that actually holds the file, not by the +system your program is running on. + +@xref{Options for Files}, for information about the +@code{_POSIX_CHOWN_RESTRICTED} macro. + +@item EROFS +The file is on a read-only file system. +@end table +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun int fchown (int @var{filedes}, uid_t @var{owner}, gid_t @var{group}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is like @code{chown}, except that it changes the owner of the open +file with descriptor @var{filedes}. + +The return value from @code{fchown} is @code{0} on success and @code{-1} +on failure. The following @code{errno} error codes are defined for this +function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item EINVAL +The @var{filedes} argument corresponds to a pipe or socket, not an ordinary +file. + +@item EPERM +This process lacks permission to make the requested change. For details +see @code{chmod} above. + +@item EROFS +The file resides on a read-only file system. +@end table +@end deftypefun + +@node Permission Bits +@subsection The Mode Bits for Access Permission + +The @dfn{file mode}, stored in the @code{st_mode} field of the file +attributes, contains two kinds of information: the file type code, and +the access permission bits. This section discusses only the access +permission bits, which control who can read or write the file. +@xref{Testing File Type}, for information about the file type code. + +All of the symbols listed in this section are defined in the header file +@file{sys/stat.h}. +@pindex sys/stat.h + +@cindex file permission bits +These symbolic constants are defined for the file mode bits that control +access permission for the file: + +@vtable @code +@comment sys/stat.h +@comment POSIX.1 +@item S_IRUSR +@comment sys/stat.h +@comment BSD +@itemx S_IREAD +Read permission bit for the owner of the file. On many systems this bit +is 0400. @code{S_IREAD} is an obsolete synonym provided for BSD +compatibility. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IWUSR +@comment sys/stat.h +@comment BSD +@itemx S_IWRITE +Write permission bit for the owner of the file. Usually 0200. +@w{@code{S_IWRITE}} is an obsolete synonym provided for BSD compatibility. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IXUSR +@comment sys/stat.h +@comment BSD +@itemx S_IEXEC +Execute (for ordinary files) or search (for directories) permission bit +for the owner of the file. Usually 0100. @code{S_IEXEC} is an obsolete +synonym provided for BSD compatibility. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IRWXU +This is equivalent to @samp{(S_IRUSR | S_IWUSR | S_IXUSR)}. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IRGRP +Read permission bit for the group owner of the file. Usually 040. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IWGRP +Write permission bit for the group owner of the file. Usually 020. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IXGRP +Execute or search permission bit for the group owner of the file. +Usually 010. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IRWXG +This is equivalent to @samp{(S_IRGRP | S_IWGRP | S_IXGRP)}. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IROTH +Read permission bit for other users. Usually 04. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IWOTH +Write permission bit for other users. Usually 02. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IXOTH +Execute or search permission bit for other users. Usually 01. + +@comment sys/stat.h +@comment POSIX.1 +@item S_IRWXO +This is equivalent to @samp{(S_IROTH | S_IWOTH | S_IXOTH)}. + +@comment sys/stat.h +@comment POSIX +@item S_ISUID +This is the set-user-ID on execute bit, usually 04000. +@xref{How Change Persona}. + +@comment sys/stat.h +@comment POSIX +@item S_ISGID +This is the set-group-ID on execute bit, usually 02000. +@xref{How Change Persona}. + +@cindex sticky bit +@comment sys/stat.h +@comment BSD +@item S_ISVTX +This is the @dfn{sticky} bit, usually 01000. + +For a directory it gives permission to delete a file in that directory +only if you own that file. Ordinarily, a user can either delete all the +files in a directory or cannot delete any of them (based on whether the +user has write permission for the directory). The same restriction +applies---you must have both write permission for the directory and own +the file you want to delete. The one exception is that the owner of the +directory can delete any file in the directory, no matter who owns it +(provided the owner has given himself write permission for the +directory). This is commonly used for the @file{/tmp} directory, where +anyone may create files but not delete files created by other users. + +Originally the sticky bit on an executable file modified the swapping +policies of the system. Normally, when a program terminated, its pages +in core were immediately freed and reused. If the sticky bit was set on +the executable file, the system kept the pages in core for a while as if +the program were still running. This was advantageous for a program +likely to be run many times in succession. This usage is obsolete in +modern systems. When a program terminates, its pages always remain in +core as long as there is no shortage of memory in the system. When the +program is next run, its pages will still be in core if no shortage +arose since the last run. + +On some modern systems where the sticky bit has no useful meaning for an +executable file, you cannot set the bit at all for a non-directory. +If you try, @code{chmod} fails with @code{EFTYPE}; +@pxref{Setting Permissions}. + +Some systems (particularly SunOS) have yet another use for the sticky +bit. If the sticky bit is set on a file that is @emph{not} executable, +it means the opposite: never cache the pages of this file at all. The +main use of this is for the files on an NFS server machine which are +used as the swap area of diskless client machines. The idea is that the +pages of the file will be cached in the client's memory, so it is a +waste of the server's memory to cache them a second time. With this +usage the sticky bit also implies that the filesystem may fail to record +the file's modification time onto disk reliably (the idea being that +no-one cares for a swap file). + +This bit is only available on BSD systems (and those derived from +them). Therefore one has to use the @code{_GNU_SOURCE} feature select +macro, or not define any feature test macros, to get the definition +(@pxref{Feature Test Macros}). +@end vtable + +The actual bit values of the symbols are listed in the table above +so you can decode file mode values when debugging your programs. +These bit values are correct for most systems, but they are not +guaranteed. + +@strong{Warning:} Writing explicit numbers for file permissions is bad +practice. Not only is it not portable, it also requires everyone who +reads your program to remember what the bits mean. To make your program +clean use the symbolic names. + +@node Access Permission +@subsection How Your Access to a File is Decided +@cindex permission to access a file +@cindex access permission for a file +@cindex file access permission + +Recall that the operating system normally decides access permission for +a file based on the effective user and group IDs of the process and its +supplementary group IDs, together with the file's owner, group and +permission bits. These concepts are discussed in detail in @ref{Process +Persona}. + +If the effective user ID of the process matches the owner user ID of the +file, then permissions for read, write, and execute/search are +controlled by the corresponding ``user'' (or ``owner'') bits. Likewise, +if any of the effective group ID or supplementary group IDs of the +process matches the group owner ID of the file, then permissions are +controlled by the ``group'' bits. Otherwise, permissions are controlled +by the ``other'' bits. + +Privileged users, like @samp{root}, can access any file regardless of +its permission bits. As a special case, for a file to be executable +even by a privileged user, at least one of its execute bits must be set. + +@node Setting Permissions +@subsection Assigning File Permissions + +@cindex file creation mask +@cindex umask +The primitive functions for creating files (for example, @code{open} or +@code{mkdir}) take a @var{mode} argument, which specifies the file +permissions to give the newly created file. This mode is modified by +the process's @dfn{file creation mask}, or @dfn{umask}, before it is +used. + +The bits that are set in the file creation mask identify permissions +that are always to be disabled for newly created files. For example, if +you set all the ``other'' access bits in the mask, then newly created +files are not accessible at all to processes in the ``other'' category, +even if the @var{mode} argument passed to the create function would +permit such access. In other words, the file creation mask is the +complement of the ordinary access permissions you want to grant. + +Programs that create files typically specify a @var{mode} argument that +includes all the permissions that make sense for the particular file. +For an ordinary file, this is typically read and write permission for +all classes of users. These permissions are then restricted as +specified by the individual user's own file creation mask. + +@findex chmod +To change the permission of an existing file given its name, call +@code{chmod}. This function uses the specified permission bits and +ignores the file creation mask. + +@pindex umask +In normal use, the file creation mask is initialized by the user's login +shell (using the @code{umask} shell command), and inherited by all +subprocesses. Application programs normally don't need to worry about +the file creation mask. It will automatically do what it is supposed to +do. + +When your program needs to create a file and bypass the umask for its +access permissions, the easiest way to do this is to use @code{fchmod} +after opening the file, rather than changing the umask. In fact, +changing the umask is usually done only by shells. They use the +@code{umask} function. + +The functions in this section are declared in @file{sys/stat.h}. +@pindex sys/stat.h + +@comment sys/stat.h +@comment POSIX.1 +@deftypefun mode_t umask (mode_t @var{mask}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{umask} function sets the file creation mask of the current +process to @var{mask}, and returns the previous value of the file +creation mask. + +Here is an example showing how to read the mask with @code{umask} +without changing it permanently: + +@smallexample +mode_t +read_umask (void) +@{ + mode_t mask = umask (0); + umask (mask); + return mask; +@} +@end smallexample + +@noindent +However, on @gnuhurdsystems{} it is better to use @code{getumask} if +you just want to read the mask value, because it is reentrant. +@end deftypefun + +@comment sys/stat.h +@comment GNU +@deftypefun mode_t getumask (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Return the current value of the file creation mask for the current +process. This function is a GNU extension and is only available on +@gnuhurdsystems{}. +@end deftypefun + +@comment sys/stat.h +@comment POSIX.1 +@deftypefun int chmod (const char *@var{filename}, mode_t @var{mode}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{chmod} function sets the access permission bits for the file +named by @var{filename} to @var{mode}. + +If @var{filename} is a symbolic link, @code{chmod} changes the +permissions of the file pointed to by the link, not those of the link +itself. + +This function returns @code{0} if successful and @code{-1} if not. In +addition to the usual file name errors (@pxref{File Name +Errors}), the following @code{errno} error conditions are defined for +this function: + +@table @code +@item ENOENT +The named file doesn't exist. + +@item EPERM +This process does not have permission to change the access permissions +of this file. Only the file's owner (as judged by the effective user ID +of the process) or a privileged user can change them. + +@item EROFS +The file resides on a read-only file system. + +@item EFTYPE +@var{mode} has the @code{S_ISVTX} bit (the ``sticky bit'') set, +and the named file is not a directory. Some systems do not allow setting the +sticky bit on non-directory files, and some do (and only some of those +assign a useful meaning to the bit for non-directory files). + +You only get @code{EFTYPE} on systems where the sticky bit has no useful +meaning for non-directory files, so it is always safe to just clear the +bit in @var{mode} and call @code{chmod} again. @xref{Permission Bits}, +for full details on the sticky bit. +@end table +@end deftypefun + +@comment sys/stat.h +@comment BSD +@deftypefun int fchmod (int @var{filedes}, mode_t @var{mode}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is like @code{chmod}, except that it changes the permissions of the +currently open file given by @var{filedes}. + +The return value from @code{fchmod} is @code{0} on success and @code{-1} +on failure. The following @code{errno} error codes are defined for this +function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item EINVAL +The @var{filedes} argument corresponds to a pipe or socket, or something +else that doesn't really have access permissions. + +@item EPERM +This process does not have permission to change the access permissions +of this file. Only the file's owner (as judged by the effective user ID +of the process) or a privileged user can change them. + +@item EROFS +The file resides on a read-only file system. +@end table +@end deftypefun + +@node Testing File Access +@subsection Testing Permission to Access a File +@cindex testing access permission +@cindex access, testing for +@cindex setuid programs and file access + +In some situations it is desirable to allow programs to access files or +devices even if this is not possible with the permissions granted to the +user. One possible solution is to set the setuid-bit of the program +file. If such a program is started the @emph{effective} user ID of the +process is changed to that of the owner of the program file. So to +allow write access to files like @file{/etc/passwd}, which normally can +be written only by the super-user, the modifying program will have to be +owned by @code{root} and the setuid-bit must be set. + +But besides the files the program is intended to change the user should +not be allowed to access any file to which s/he would not have access +anyway. The program therefore must explicitly check whether @emph{the +user} would have the necessary access to a file, before it reads or +writes the file. + +To do this, use the function @code{access}, which checks for access +permission based on the process's @emph{real} user ID rather than the +effective user ID. (The setuid feature does not alter the real user ID, +so it reflects the user who actually ran the program.) + +There is another way you could check this access, which is easy to +describe, but very hard to use. This is to examine the file mode bits +and mimic the system's own access computation. This method is +undesirable because many systems have additional access control +features; your program cannot portably mimic them, and you would not +want to try to keep track of the diverse features that different systems +have. Using @code{access} is simple and automatically does whatever is +appropriate for the system you are using. + +@code{access} is @emph{only} appropriate to use in setuid programs. +A non-setuid program will always use the effective ID rather than the +real ID. + +@pindex unistd.h +The symbols in this section are declared in @file{unistd.h}. + +@comment unistd.h +@comment POSIX.1 +@deftypefun int access (const char *@var{filename}, int @var{how}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{access} function checks to see whether the file named by +@var{filename} can be accessed in the way specified by the @var{how} +argument. The @var{how} argument either can be the bitwise OR of the +flags @code{R_OK}, @code{W_OK}, @code{X_OK}, or the existence test +@code{F_OK}. + +This function uses the @emph{real} user and group IDs of the calling +process, rather than the @emph{effective} IDs, to check for access +permission. As a result, if you use the function from a @code{setuid} +or @code{setgid} program (@pxref{How Change Persona}), it gives +information relative to the user who actually ran the program. + +The return value is @code{0} if the access is permitted, and @code{-1} +otherwise. (In other words, treated as a predicate function, +@code{access} returns true if the requested access is @emph{denied}.) + +In addition to the usual file name errors (@pxref{File Name +Errors}), the following @code{errno} error conditions are defined for +this function: + +@table @code +@item EACCES +The access specified by @var{how} is denied. + +@item ENOENT +The file doesn't exist. + +@item EROFS +Write permission was requested for a file on a read-only file system. +@end table +@end deftypefun + +These macros are defined in the header file @file{unistd.h} for use +as the @var{how} argument to the @code{access} function. The values +are integer constants. +@pindex unistd.h + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro int R_OK +Flag meaning test for read permission. +@end deftypevr + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro int W_OK +Flag meaning test for write permission. +@end deftypevr + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro int X_OK +Flag meaning test for execute/search permission. +@end deftypevr + +@comment unistd.h +@comment POSIX.1 +@deftypevr Macro int F_OK +Flag meaning test for existence of the file. +@end deftypevr + +@node File Times +@subsection File Times + +@cindex file access time +@cindex file modification time +@cindex file attribute modification time +Each file has three time stamps associated with it: its access time, +its modification time, and its attribute modification time. These +correspond to the @code{st_atime}, @code{st_mtime}, and @code{st_ctime} +members of the @code{stat} structure; see @ref{File Attributes}. + +All of these times are represented in calendar time format, as +@code{time_t} objects. This data type is defined in @file{time.h}. +For more information about representation and manipulation of time +values, see @ref{Calendar Time}. +@pindex time.h + +Reading from a file updates its access time attribute, and writing +updates its modification time. When a file is created, all three +time stamps for that file are set to the current time. In addition, the +attribute change time and modification time fields of the directory that +contains the new entry are updated. + +Adding a new name for a file with the @code{link} function updates the +attribute change time field of the file being linked, and both the +attribute change time and modification time fields of the directory +containing the new name. These same fields are affected if a file name +is deleted with @code{unlink}, @code{remove} or @code{rmdir}. Renaming +a file with @code{rename} affects only the attribute change time and +modification time fields of the two parent directories involved, and not +the times for the file being renamed. + +Changing the attributes of a file (for example, with @code{chmod}) +updates its attribute change time field. + +You can also change some of the time stamps of a file explicitly using +the @code{utime} function---all except the attribute change time. You +need to include the header file @file{utime.h} to use this facility. +@pindex utime.h + +@comment utime.h +@comment POSIX.1 +@deftp {Data Type} {struct utimbuf} +The @code{utimbuf} structure is used with the @code{utime} function to +specify new access and modification times for a file. It contains the +following members: + +@table @code +@item time_t actime +This is the access time for the file. + +@item time_t modtime +This is the modification time for the file. +@end table +@end deftp + +@comment utime.h +@comment POSIX.1 +@deftypefun int utime (const char *@var{filename}, const struct utimbuf *@var{times}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c In the absence of a utime syscall, it non-atomically converts times +@c to a struct timeval and calls utimes. +This function is used to modify the file times associated with the file +named @var{filename}. + +If @var{times} is a null pointer, then the access and modification times +of the file are set to the current time. Otherwise, they are set to the +values from the @code{actime} and @code{modtime} members (respectively) +of the @code{utimbuf} structure pointed to by @var{times}. + +The attribute modification time for the file is set to the current time +in either case (since changing the time stamps is itself a modification +of the file attributes). + +The @code{utime} function returns @code{0} if successful and @code{-1} +on failure. In addition to the usual file name errors +(@pxref{File Name Errors}), the following @code{errno} error conditions +are defined for this function: + +@table @code +@item EACCES +There is a permission problem in the case where a null pointer was +passed as the @var{times} argument. In order to update the time stamp on +the file, you must either be the owner of the file, have write +permission for the file, or be a privileged user. + +@item ENOENT +The file doesn't exist. + +@item EPERM +If the @var{times} argument is not a null pointer, you must either be +the owner of the file or be a privileged user. + +@item EROFS +The file lives on a read-only file system. +@end table +@end deftypefun + +Each of the three time stamps has a corresponding microsecond part, +which extends its resolution. These fields are called +@code{st_atime_usec}, @code{st_mtime_usec}, and @code{st_ctime_usec}; +each has a value between 0 and 999,999, which indicates the time in +microseconds. They correspond to the @code{tv_usec} field of a +@code{timeval} structure; see @ref{High-Resolution Calendar}. + +The @code{utimes} function is like @code{utime}, but also lets you specify +the fractional part of the file times. The prototype for this function is +in the header file @file{sys/time.h}. +@pindex sys/time.h + +@comment sys/time.h +@comment BSD +@deftypefun int utimes (const char *@var{filename}, const struct timeval @var{tvp}@t{[2]}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c In the absence of a utimes syscall, it non-atomically converts tvp +@c to struct timespec array and issues a utimensat syscall, or to +@c struct utimbuf and calls utime. +This function sets the file access and modification times of the file +@var{filename}. The new file access time is specified by +@code{@var{tvp}[0]}, and the new modification time by +@code{@var{tvp}[1]}. Similar to @code{utime}, if @var{tvp} is a null +pointer then the access and modification times of the file are set to +the current time. This function comes from BSD. + +The return values and error conditions are the same as for the @code{utime} +function. +@end deftypefun + +@comment sys/time.h +@comment BSD +@deftypefun int lutimes (const char *@var{filename}, const struct timeval @var{tvp}@t{[2]}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Since there's no lutimes syscall, it non-atomically converts tvp +@c to struct timespec array and issues a utimensat syscall. +This function is like @code{utimes}, except that it does not follow +symbolic links. If @var{filename} is the name of a symbolic link, +@code{lutimes} sets the file access and modification times of the +symbolic link special file itself (as seen by @code{lstat}; +@pxref{Symbolic Links}) while @code{utimes} sets the file access and +modification times of the file the symbolic link refers to. This +function comes from FreeBSD, and is not available on all platforms (if +not available, it will fail with @code{ENOSYS}). + +The return values and error conditions are the same as for the @code{utime} +function. +@end deftypefun + +@comment sys/time.h +@comment BSD +@deftypefun int futimes (int @var{fd}, const struct timeval @var{tvp}@t{[2]}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Since there's no futimes syscall, it non-atomically converts tvp +@c to struct timespec array and issues a utimensat syscall, falling back +@c to utimes on a /proc/self/fd symlink. +This function is like @code{utimes}, except that it takes an open file +descriptor as an argument instead of a file name. @xref{Low-Level +I/O}. This function comes from FreeBSD, and is not available on all +platforms (if not available, it will fail with @code{ENOSYS}). + +Like @code{utimes}, @code{futimes} returns @code{0} on success and @code{-1} +on failure. The following @code{errno} error conditions are defined for +@code{futimes}: + +@table @code +@item EACCES +There is a permission problem in the case where a null pointer was +passed as the @var{times} argument. In order to update the time stamp on +the file, you must either be the owner of the file, have write +permission for the file, or be a privileged user. + +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item EPERM +If the @var{times} argument is not a null pointer, you must either be +the owner of the file or be a privileged user. + +@item EROFS +The file lives on a read-only file system. +@end table +@end deftypefun + +@node File Size +@subsection File Size + +Normally file sizes are maintained automatically. A file begins with a +size of @math{0} and is automatically extended when data is written past +its end. It is also possible to empty a file completely by an +@code{open} or @code{fopen} call. + +However, sometimes it is necessary to @emph{reduce} the size of a file. +This can be done with the @code{truncate} and @code{ftruncate} functions. +They were introduced in BSD Unix. @code{ftruncate} was later added to +POSIX.1. + +Some systems allow you to extend a file (creating holes) with these +functions. This is useful when using memory-mapped I/O +(@pxref{Memory-mapped I/O}), where files are not automatically extended. +However, it is not portable but must be implemented if @code{mmap} +allows mapping of files (i.e., @code{_POSIX_MAPPED_FILES} is defined). + +Using these functions on anything other than a regular file gives +@emph{undefined} results. On many systems, such a call will appear to +succeed, without actually accomplishing anything. + +@comment unistd.h +@comment X/Open +@deftypefun int truncate (const char *@var{filename}, off_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c In the absence of a truncate syscall, we use open and ftruncate. + +The @code{truncate} function changes the size of @var{filename} to +@var{length}. If @var{length} is shorter than the previous length, data +at the end will be lost. The file must be writable by the user to +perform this operation. + +If @var{length} is longer, holes will be added to the end. However, some +systems do not support this feature and will leave the file unchanged. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{truncate} function is in fact @code{truncate64} and the type +@code{off_t} has 64 bits which makes it possible to handle files up to +@twoexp{63} bytes in length. + +The return value is @math{0} for success, or @math{-1} for an error. In +addition to the usual file name errors, the following errors may occur: + +@table @code + +@item EACCES +The file is a directory or not writable. + +@item EINVAL +@var{length} is negative. + +@item EFBIG +The operation would extend the file beyond the limits of the operating system. + +@item EIO +A hardware I/O error occurred. + +@item EPERM +The file is "append-only" or "immutable". + +@item EINTR +The operation was interrupted by a signal. + +@end table + +@end deftypefun + +@comment unistd.h +@comment Unix98 +@deftypefun int truncate64 (const char *@var{name}, off64_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c In the absence of a syscall, try truncate if length fits. +This function is similar to the @code{truncate} function. The +difference is that the @var{length} argument is 64 bits wide even on 32 +bits machines, which allows the handling of files with sizes up to +@twoexp{63} bytes. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bits machine this function is actually available under the name +@code{truncate} and so transparently replaces the 32 bits interface. +@end deftypefun + +@comment unistd.h +@comment POSIX +@deftypefun int ftruncate (int @var{fd}, off_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +This is like @code{truncate}, but it works on a file descriptor @var{fd} +for an opened file instead of a file name to identify the object. The +file must be opened for writing to successfully carry out the operation. + +The POSIX standard leaves it implementation defined what happens if the +specified new @var{length} of the file is bigger than the original size. +The @code{ftruncate} function might simply leave the file alone and do +nothing or it can increase the size to the desired size. In this later +case the extended area should be zero-filled. So using @code{ftruncate} +is no reliable way to increase the file size but if it is possible it is +probably the fastest way. The function also operates on POSIX shared +memory segments if these are implemented by the system. + +@code{ftruncate} is especially useful in combination with @code{mmap}. +Since the mapped region must have a fixed size one cannot enlarge the +file by writing something beyond the last mapped page. Instead one has +to enlarge the file itself and then remap the file with the new size. +The example below shows how this works. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{ftruncate} function is in fact @code{ftruncate64} and the type +@code{off_t} has 64 bits which makes it possible to handle files up to +@twoexp{63} bytes in length. + +The return value is @math{0} for success, or @math{-1} for an error. The +following errors may occur: + +@table @code + +@item EBADF +@var{fd} does not correspond to an open file. + +@item EACCES +@var{fd} is a directory or not open for writing. + +@item EINVAL +@var{length} is negative. + +@item EFBIG +The operation would extend the file beyond the limits of the operating system. +@c or the open() call -- with the not-yet-discussed feature of opening +@c files with extra-large offsets. + +@item EIO +A hardware I/O error occurred. + +@item EPERM +The file is "append-only" or "immutable". + +@item EINTR +The operation was interrupted by a signal. + +@c ENOENT is also possible on Linux --- however it only occurs if the file +@c descriptor has a `file' structure but no `inode' structure. I'm not +@c sure how such an fd could be created. Perhaps it's a bug. + +@end table + +@end deftypefun + +@comment unistd.h +@comment Unix98 +@deftypefun int ftruncate64 (int @var{id}, off64_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c In the absence of a syscall, try ftruncate if length fits. +This function is similar to the @code{ftruncate} function. The +difference is that the @var{length} argument is 64 bits wide even on 32 +bits machines which allows the handling of files with sizes up to +@twoexp{63} bytes. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bits machine this function is actually available under the name +@code{ftruncate} and so transparently replaces the 32 bits interface. +@end deftypefun + +As announced here is a little example of how to use @code{ftruncate} in +combination with @code{mmap}: + +@smallexample +int fd; +void *start; +size_t len; + +int +add (off_t at, void *block, size_t size) +@{ + if (at + size > len) + @{ + /* Resize the file and remap. */ + size_t ps = sysconf (_SC_PAGESIZE); + size_t ns = (at + size + ps - 1) & ~(ps - 1); + void *np; + if (ftruncate (fd, ns) < 0) + return -1; + np = mmap (NULL, ns, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (np == MAP_FAILED) + return -1; + start = np; + len = ns; + @} + memcpy ((char *) start + at, block, size); + return 0; +@} +@end smallexample + +The function @code{add} writes a block of memory at an arbitrary +position in the file. If the current size of the file is too small it +is extended. Note that it is extended by a whole number of pages. This +is a requirement of @code{mmap}. The program has to keep track of the +real size, and when it has finished a final @code{ftruncate} call should +set the real size of the file. + +@node Storage Allocation +@subsection Storage Allocation +@cindex allocating file storage +@cindex file allocation +@cindex storage allocating + +@cindex file fragmentation +@cindex fragmentation of files +@cindex sparse files +@cindex files, sparse +Most file systems support allocating large files in a non-contiguous +fashion: the file is split into @emph{fragments} which are allocated +sequentially, but the fragments themselves can be scattered across the +disk. File systems generally try to avoid such fragmentation because it +decreases performance, but if a file gradually increases in size, there +might be no other option than to fragment it. In addition, many file +systems support @emph{sparse files} with @emph{holes}: regions of null +bytes for which no backing storage has been allocated by the file +system. When the holes are finally overwritten with data, fragmentation +can occur as well. + +Explicit allocation of storage for yet-unwritten parts of the file can +help the system to avoid fragmentation. Additionally, if storage +pre-allocation fails, it is possible to report the out-of-disk error +early, often without filling up the entire disk. However, due to +deduplication, copy-on-write semantics, and file compression, such +pre-allocation may not reliably prevent the out-of-disk-space error from +occurring later. Checking for write errors is still required, and +writes to memory-mapped regions created with @code{mmap} can still +result in @code{SIGBUS}. + +@deftypefun int posix_fallocate (int @var{fd}, off_t @var{offset}, off_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c If the file system does not support allocation, +@c @code{posix_fallocate} has a race with file extension (if +@c @var{length} is zero) or with concurrent writes of non-NUL bytes (if +@c @var{length} is positive). + +Allocate backing store for the region of @var{length} bytes starting at +byte @var{offset} in the file for the descriptor @var{fd}. The file +length is increased to @samp{@var{length} + @var{offset}} if necessary. + +@var{fd} must be a regular file opened for writing, or @code{EBADF} is +returned. If there is insufficient disk space to fulfill the allocation +request, @code{ENOSPC} is returned. + +@strong{Note:} If @code{fallocate} is not available (because the file +system does not support it), @code{posix_fallocate} is emulated, which +has the following drawbacks: + +@itemize @bullet +@item +It is very inefficient because all file system blocks in the requested +range need to be examined (even if they have been allocated before) and +potentially rewritten. In contrast, with proper @code{fallocate} +support (see below), the file system can examine the internal file +allocation data structures and eliminate holes directly, maybe even +using unwritten extents (which are pre-allocated but uninitialized on +disk). + +@item +There is a race condition if another thread or process modifies the +underlying file in the to-be-allocated area. Non-null bytes could be +overwritten with null bytes. + +@item +If @var{fd} has been opened with the @code{O_WRONLY} flag, the function +will fail with an @code{errno} value of @code{EBADF}. + +@item +If @var{fd} has been opened with the @code{O_APPEND} flag, the function +will fail with an @code{errno} value of @code{EBADF}. + +@item +If @var{length} is zero, @code{ftruncate} is used to increase the file +size as requested, without allocating file system blocks. There is a +race condition which means that @code{ftruncate} can accidentally +truncate the file if it has been extended concurrently. +@end itemize + +On Linux, if an application does not benefit from emulation or if the +emulation is harmful due to its inherent race conditions, the +application can use the Linux-specific @code{fallocate} function, with a +zero flag argument. For the @code{fallocate} function, @theglibc{} does +not perform allocation emulation if the file system does not support +allocation. Instead, an @code{EOPNOTSUPP} is returned to the caller. + +@end deftypefun + +@deftypefun int posix_fallocate64 (int @var{fd}, off64_t @var{offset}, off64_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +This function is a variant of @code{posix_fallocate64} which accepts +64-bit file offsets on all platforms. + +@end deftypefun + +@node Making Special Files +@section Making Special Files +@cindex creating special files +@cindex special files + +The @code{mknod} function is the primitive for making special files, +such as files that correspond to devices. @Theglibc{} includes +this function for compatibility with BSD. + +The prototype for @code{mknod} is declared in @file{sys/stat.h}. +@pindex sys/stat.h + +@comment sys/stat.h +@comment BSD +@deftypefun int mknod (const char *@var{filename}, mode_t @var{mode}, dev_t @var{dev}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Instead of issuing the syscall directly, we go through xmknod. +@c Although the internal xmknod takes a dev_t*, that could lead to +@c @mtsrace races, it's passed a pointer to mknod's dev. +The @code{mknod} function makes a special file with name @var{filename}. +The @var{mode} specifies the mode of the file, and may include the various +special file bits, such as @code{S_IFCHR} (for a character special file) +or @code{S_IFBLK} (for a block special file). @xref{Testing File Type}. + +The @var{dev} argument specifies which device the special file refers to. +Its exact interpretation depends on the kind of special file being created. + +The return value is @code{0} on success and @code{-1} on error. In addition +to the usual file name errors (@pxref{File Name Errors}), the +following @code{errno} error conditions are defined for this function: + +@table @code +@item EPERM +The calling process is not privileged. Only the superuser can create +special files. + +@item ENOSPC +The directory or file system that would contain the new file is full +and cannot be extended. + +@item EROFS +The directory containing the new file can't be modified because it's on +a read-only file system. + +@item EEXIST +There is already a file named @var{filename}. If you want to replace +this file, you must remove the old file explicitly first. +@end table +@end deftypefun + +@node Temporary Files +@section Temporary Files + +If you need to use a temporary file in your program, you can use the +@code{tmpfile} function to open it. Or you can use the @code{tmpnam} +(better: @code{tmpnam_r}) function to provide a name for a temporary +file and then you can open it in the usual way with @code{fopen}. + +The @code{tempnam} function is like @code{tmpnam} but lets you choose +what directory temporary files will go in, and something about what +their file names will look like. Important for multi-threaded programs +is that @code{tempnam} is reentrant, while @code{tmpnam} is not since it +returns a pointer to a static buffer. + +These facilities are declared in the header file @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypefun {FILE *} tmpfile (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @acsfd{} @aculock{}}} +@c The unsafety issues are those of fdopen, plus @acsfd because of the +@c open. +@c __path_search (internal buf, !dir, const pfx, !try_tmpdir) ok +@c libc_secure_genenv only if try_tmpdir +@c xstat64, strlen, strcmp, sprintf +@c __gen_tempname (internal tmpl, __GT_FILE) ok +@c strlen, memcmp, getpid, open/mkdir/lxstat64 ok +@c HP_TIMING_NOW if available ok +@c gettimeofday (!tz) first time, or every time if no HP_TIMING_NOW ok +@c static value is used and modified without synchronization ok +@c but the use is as a source of non-cryptographic randomness +@c with retries in case of collision, so it should be safe +@c unlink, fdopen +This function creates a temporary binary file for update mode, as if by +calling @code{fopen} with mode @code{"wb+"}. The file is deleted +automatically when it is closed or when the program terminates. (On +some other @w{ISO C} systems the file may fail to be deleted if the program +terminates abnormally). + +This function is reentrant. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit system this function is in fact @code{tmpfile64}, i.e., the LFS +interface transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment Unix98 +@deftypefun {FILE *} tmpfile64 (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @acsfd{} @aculock{}}} +This function is similar to @code{tmpfile}, but the stream it returns a +pointer to was opened using @code{tmpfile64}. Therefore this stream can +be used for files larger than @twoexp{31} bytes on 32-bit machines. + +Please note that the return type is still @code{FILE *}. There is no +special @code{FILE} type for the LFS interface. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a 32 +bits machine this function is available under the name @code{tmpfile} +and so transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun {char *} tmpnam (char *@var{result}) +@safety{@prelim{}@mtunsafe{@mtasurace{:tmpnam/!result}}@asunsafe{}@acsafe{}} +@c The passed-in buffer should not be modified concurrently with the +@c call. +@c __path_search (static or passed-in buf, !dir, !pfx, !try_tmpdir) ok +@c __gen_tempname (internal tmpl, __GT_NOCREATE) ok +This function constructs and returns a valid file name that does not +refer to any existing file. If the @var{result} argument is a null +pointer, the return value is a pointer to an internal static string, +which might be modified by subsequent calls and therefore makes this +function non-reentrant. Otherwise, the @var{result} argument should be +a pointer to an array of at least @code{L_tmpnam} characters, and the +result is written into that array. + +It is possible for @code{tmpnam} to fail if you call it too many times +without removing previously-created files. This is because the limited +length of the temporary file names gives room for only a finite number +of different names. If @code{tmpnam} fails it returns a null pointer. + +@strong{Warning:} Between the time the pathname is constructed and the +file is created another process might have created a file with the same +name using @code{tmpnam}, leading to a possible security hole. The +implementation generates names which can hardly be predicted, but when +opening the file you should use the @code{O_EXCL} flag. Using +@code{tmpfile} or @code{mkstemp} is a safe way to avoid this problem. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun {char *} tmpnam_r (char *@var{result}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is nearly identical to the @code{tmpnam} function, except +that if @var{result} is a null pointer it returns a null pointer. + +This guarantees reentrancy because the non-reentrant situation of +@code{tmpnam} cannot happen here. + +@strong{Warning}: This function has the same security problems as +@code{tmpnam}. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypevr Macro int L_tmpnam +The value of this macro is an integer constant expression that +represents the minimum size of a string large enough to hold a file name +generated by the @code{tmpnam} function. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypevr Macro int TMP_MAX +The macro @code{TMP_MAX} is a lower bound for how many temporary names +you can create with @code{tmpnam}. You can rely on being able to call +@code{tmpnam} at least this many times before it might fail saying you +have made too many temporary file names. + +With @theglibc{}, you can create a very large number of temporary +file names. If you actually created the files, you would probably run +out of disk space before you ran out of names. Some other systems have +a fixed, small limit on the number of temporary files. The limit is +never less than @code{25}. +@end deftypevr + +@comment stdio.h +@comment SVID +@deftypefun {char *} tempnam (const char *@var{dir}, const char *@var{prefix}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c There's no way (short of being setuid) to avoid getenv("TMPDIR"), +@c even with a non-NULL dir. +@c +@c __path_search (internal buf, dir, pfx, try_tmpdir) unsafe getenv +@c __gen_tempname (internal tmpl, __GT_NOCREATE) ok +@c strdup +This function generates a unique temporary file name. If @var{prefix} +is not a null pointer, up to five characters of this string are used as +a prefix for the file name. The return value is a string newly +allocated with @code{malloc}, so you should release its storage with +@code{free} when it is no longer needed. + +Because the string is dynamically allocated this function is reentrant. + +The directory prefix for the temporary file name is determined by +testing each of the following in sequence. The directory must exist and +be writable. + +@itemize @bullet +@item +The environment variable @code{TMPDIR}, if it is defined. For security +reasons this only happens if the program is not SUID or SGID enabled. + +@item +The @var{dir} argument, if it is not a null pointer. + +@item +The value of the @code{P_tmpdir} macro. + +@item +The directory @file{/tmp}. +@end itemize + +This function is defined for SVID compatibility. + +@strong{Warning:} Between the time the pathname is constructed and the +file is created another process might have created a file with the same +name using @code{tempnam}, leading to a possible security hole. The +implementation generates names which can hardly be predicted, but when +opening the file you should use the @code{O_EXCL} flag. Using +@code{tmpfile} or @code{mkstemp} is a safe way to avoid this problem. +@end deftypefun +@cindex TMPDIR environment variable + +@c !!! are we putting SVID/GNU/POSIX.1/BSD in here or not?? +@comment stdio.h +@comment SVID +@deftypevr {SVID Macro} {char *} P_tmpdir +This macro is the name of the default directory for temporary files. +@end deftypevr + +Older Unix systems did not have the functions just described. Instead +they used @code{mktemp} and @code{mkstemp}. Both of these functions +work by modifying a file name template string you pass. The last six +characters of this string must be @samp{XXXXXX}. These six @samp{X}s +are replaced with six characters which make the whole string a unique +file name. Usually the template string is something like +@samp{/tmp/@var{prefix}XXXXXX}, and each program uses a unique @var{prefix}. + +@strong{NB:} Because @code{mktemp} and @code{mkstemp} modify the +template string, you @emph{must not} pass string constants to them. +String constants are normally in read-only storage, so your program +would crash when @code{mktemp} or @code{mkstemp} tried to modify the +string. These functions are declared in the header file @file{stdlib.h}. +@pindex stdlib.h + +@comment stdlib.h +@comment Unix +@deftypefun {char *} mktemp (char *@var{template}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c __gen_tempname (caller tmpl, __GT_NOCREATE) ok +The @code{mktemp} function generates a unique file name by modifying +@var{template} as described above. If successful, it returns +@var{template} as modified. If @code{mktemp} cannot find a unique file +name, it makes @var{template} an empty string and returns that. If +@var{template} does not end with @samp{XXXXXX}, @code{mktemp} returns a +null pointer. + +@strong{Warning:} Between the time the pathname is constructed and the +file is created another process might have created a file with the same +name using @code{mktemp}, leading to a possible security hole. The +implementation generates names which can hardly be predicted, but when +opening the file you should use the @code{O_EXCL} flag. Using +@code{mkstemp} is a safe way to avoid this problem. +@end deftypefun + +@comment stdlib.h +@comment BSD +@deftypefun int mkstemp (char *@var{template}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +@c __gen_tempname (caller tmpl, __GT_FILE) ok +The @code{mkstemp} function generates a unique file name just as +@code{mktemp} does, but it also opens the file for you with @code{open} +(@pxref{Opening and Closing Files}). If successful, it modifies +@var{template} in place and returns a file descriptor for that file open +for reading and writing. If @code{mkstemp} cannot create a +uniquely-named file, it returns @code{-1}. If @var{template} does not +end with @samp{XXXXXX}, @code{mkstemp} returns @code{-1} and does not +modify @var{template}. + +The file is opened using mode @code{0600}. If the file is meant to be +used by other users this mode must be changed explicitly. +@end deftypefun + +Unlike @code{mktemp}, @code{mkstemp} is actually guaranteed to create a +unique file that cannot possibly clash with any other program trying to +create a temporary file. This is because it works by calling +@code{open} with the @code{O_EXCL} flag, which says you want to create a +new file and get an error if the file already exists. + +@comment stdlib.h +@comment BSD +@deftypefun {char *} mkdtemp (char *@var{template}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c __gen_tempname (caller tmpl, __GT_DIR) ok +The @code{mkdtemp} function creates a directory with a unique name. If +it succeeds, it overwrites @var{template} with the name of the +directory, and returns @var{template}. As with @code{mktemp} and +@code{mkstemp}, @var{template} should be a string ending with +@samp{XXXXXX}. + +If @code{mkdtemp} cannot create an uniquely named directory, it returns +@code{NULL} and sets @var{errno} appropriately. If @var{template} does +not end with @samp{XXXXXX}, @code{mkdtemp} returns @code{NULL} and does +not modify @var{template}. @var{errno} will be set to @code{EINVAL} in +this case. + +The directory is created using mode @code{0700}. +@end deftypefun + +The directory created by @code{mkdtemp} cannot clash with temporary +files or directories created by other users. This is because directory +creation always works like @code{open} with @code{O_EXCL}. +@xref{Creating Directories}. + +The @code{mkdtemp} function comes from OpenBSD. + +@c FIXME these are undocumented: +@c faccessat +@c fchmodat +@c fchownat +@c futimesat +@c fstatat (there's a commented-out safety assessment for this one) +@c linkat +@c mkdirat +@c mkfifoat +@c name_to_handle_at +@c openat +@c open_by_handle_at +@c readlinkat +@c renameat +@c scandirat +@c symlinkat +@c unlinkat +@c utimensat +@c mknodat diff --git a/REORG.TODO/manual/freemanuals.texi b/REORG.TODO/manual/freemanuals.texi new file mode 100644 index 0000000000..9caf22017c --- /dev/null +++ b/REORG.TODO/manual/freemanuals.texi @@ -0,0 +1,93 @@ +@c freemanuals.texi - blurb for free documentation. +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@cindex free documentation + +The biggest deficiency in the free software community today is not in +the software---it is the lack of good free documentation that we can +include with the free software. Many of our most important +programs do not come with free reference manuals and free introductory +texts. Documentation is an essential part of any software package; +when an important free software package does not come with a free +manual and a free tutorial, that is a major gap. We have many such +gaps today. + +Consider Perl, for instance. The tutorial manuals that people +normally use are non-free. How did this come about? Because the +authors of those manuals published them with restrictive terms---no +copying, no modification, source files not available---which exclude +them from the free software world. + +That wasn't the first time this sort of thing happened, and it was far +from the last. Many times we have heard a GNU user eagerly describe a +manual that he is writing, his intended contribution to the community, +only to learn that he had ruined everything by signing a publication +contract to make it non-free. + +Free documentation, like free software, is a matter of freedom, not +price. The problem with the non-free manual is not that publishers +charge a price for printed copies---that in itself is fine. (The Free +Software Foundation sells printed copies of manuals, too.) The +problem is the restrictions on the use of the manual. Free manuals +are available in source code form, and give you permission to copy and +modify. Non-free manuals do not allow this. + +The criteria of freedom for a free manual are roughly the same as for +free software. Redistribution (including the normal kinds of +commercial redistribution) must be permitted, so that the manual can +accompany every copy of the program, both on-line and on paper. + +Permission for modification of the technical content is crucial too. +When people modify the software, adding or changing features, if they +are conscientious they will change the manual too---so they can +provide accurate and clear documentation for the modified program. A +manual that leaves you no choice but to write a new manual to document +a changed version of the program is not really available to our +community. + +Some kinds of limits on the way modification is handled are +acceptable. For example, requirements to preserve the original +author's copyright notice, the distribution terms, or the list of +authors, are ok. It is also no problem to require modified versions +to include notice that they were modified. Even entire sections that +may not be deleted or changed are acceptable, as long as they deal +with nontechnical topics (like this one). These kinds of restrictions +are acceptable because they don't obstruct the community's normal use +of the manual. + +However, it must be possible to modify all the @emph{technical} +content of the manual, and then distribute the result in all the usual +media, through all the usual channels. Otherwise, the restrictions +obstruct the use of the manual, it is not free, and we need another +manual to replace it. + +Please spread the word about this issue. Our community continues to +lose manuals to proprietary publishing. If we spread the word that +free software needs free reference manuals and free tutorials, perhaps +the next person who wants to contribute by writing documentation will +realize, before it is too late, that only free manuals contribute to +the free software community. + +If you are writing documentation, please insist on publishing it under +the GNU Free Documentation License or another free documentation +license. Remember that this decision requires your approval---you +don't have to let the publisher decide. Some commercial publishers +will use a free license if you insist, but they will not propose the +option; it is up to you to raise the issue and say firmly that this is +what you want. If the publisher you are dealing with refuses, please +try other publishers. If you're not sure whether a proposed license +is free, write to @email{licensing@@gnu.org}. + +You can encourage commercial publishers to sell more free, copylefted +manuals and tutorials by buying them, and particularly by buying +copies from the publishers that paid for their writing or for major +improvements. Meanwhile, try to avoid buying non-free documentation +at all. Check the distribution terms of a manual before you buy it, +and insist that whoever seeks your business must respect your freedom. +Check the history of the book, and try reward the publishers that have +paid or pay the authors to work on it. + +The Free Software Foundation maintains a list of free documentation +published by other publishers, at +@url{http://www.fsf.org/doc/other-free-books.html}. diff --git a/REORG.TODO/manual/getopt.texi b/REORG.TODO/manual/getopt.texi new file mode 100644 index 0000000000..a71c3731aa --- /dev/null +++ b/REORG.TODO/manual/getopt.texi @@ -0,0 +1,330 @@ +@node Getopt, Argp, , Parsing Program Arguments +@section Parsing program options using @code{getopt} + +The @code{getopt} and @code{getopt_long} functions automate some of the +chore involved in parsing typical unix command line options. + +@menu +* Using Getopt:: Using the @code{getopt} function. +* Example of Getopt:: An example of parsing options with @code{getopt}. +* Getopt Long Options:: GNU suggests utilities accept long-named + options; here is one way to do. +* Getopt Long Option Example:: An example of using @code{getopt_long}. +@end menu + +@node Using Getopt, Example of Getopt, , Getopt +@subsection Using the @code{getopt} function + +Here are the details about how to call the @code{getopt} function. To +use this facility, your program must include the header file +@file{unistd.h}. +@pindex unistd.h + +@comment unistd.h +@comment POSIX.2 +@deftypevar int opterr +If the value of this variable is nonzero, then @code{getopt} prints an +error message to the standard error stream if it encounters an unknown +option character or an option with a missing required argument. This is +the default behavior. If you set this variable to zero, @code{getopt} +does not print any messages, but it still returns the character @code{?} +to indicate an error. +@end deftypevar + +@comment unistd.h +@comment POSIX.2 +@deftypevar int optopt +When @code{getopt} encounters an unknown option character or an option +with a missing required argument, it stores that option character in +this variable. You can use this for providing your own diagnostic +messages. +@end deftypevar + +@comment unistd.h +@comment POSIX.2 +@deftypevar int optind +This variable is set by @code{getopt} to the index of the next element +of the @var{argv} array to be processed. Once @code{getopt} has found +all of the option arguments, you can use this variable to determine +where the remaining non-option arguments begin. The initial value of +this variable is @code{1}. +@end deftypevar + +@comment unistd.h +@comment POSIX.2 +@deftypevar {char *} optarg +This variable is set by @code{getopt} to point at the value of the +option argument, for those options that accept arguments. +@end deftypevar + +@comment unistd.h +@comment POSIX.2 +@deftypefun int getopt (int @var{argc}, char *const *@var{argv}, const char *@var{options}) +@safety{@prelim{}@mtunsafe{@mtasurace{:getopt} @mtsenv{}}@asunsafe{@ascuheap{} @ascuintl{} @asulock{} @asucorrupt{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +@c Swapping elements of passed-in argv may be partial in case of +@c cancellation. Gettext brings about a whole lot of AS and AC safety +@c issues. The getopt API involves returning values in the +@c non-thread-specific optarg variable, which adds another thread-safety +@c issue. Given print_errors, it may output errors to stderr, which may +@c self-deadlock, leak locks, or encounter (in a signal handler) or +@c leave (in case of cancellation) stderr in an inconsistent state. +@c Various implicit, indirect uses of malloc, in uses of memstream and +@c asprintf for error-printing, bring about the usual malloc issues. +@c +@c _getopt_internal +@c _getopt_internal_r +@c gettext +@c _getopt_initialize +@c getenv +@c open_memstream +@c lockfile, unlockfile, __fxprintf -> stderr +@c asprintf +The @code{getopt} function gets the next option argument from the +argument list specified by the @var{argv} and @var{argc} arguments. +Normally these values come directly from the arguments received by +@code{main}. + +The @var{options} argument is a string that specifies the option +characters that are valid for this program. An option character in this +string can be followed by a colon (@samp{:}) to indicate that it takes a +required argument. If an option character is followed by two colons +(@samp{::}), its argument is optional; this is a GNU extension. + +@code{getopt} has three ways to deal with options that follow +non-options @var{argv} elements. The special argument @samp{--} forces +in all cases the end of option scanning. + +@itemize @bullet +@item +The default is to permute the contents of @var{argv} while scanning it +so that eventually all the non-options are at the end. This allows +options to be given in any order, even with programs that were not +written to expect this. + +@item +If the @var{options} argument string begins with a hyphen (@samp{-}), this +is treated specially. It permits arguments that are not options to be +returned as if they were associated with option character @samp{\1}. + +@item +POSIX demands the following behavior: the first non-option stops option +processing. This mode is selected by either setting the environment +variable @code{POSIXLY_CORRECT} or beginning the @var{options} argument +string with a plus sign (@samp{+}). +@end itemize + +The @code{getopt} function returns the option character for the next +command line option. When no more option arguments are available, it +returns @code{-1}. There may still be more non-option arguments; you +must compare the external variable @code{optind} against the @var{argc} +parameter to check this. + +If the option has an argument, @code{getopt} returns the argument by +storing it in the variable @var{optarg}. You don't ordinarily need to +copy the @code{optarg} string, since it is a pointer into the original +@var{argv} array, not into a static area that might be overwritten. + +If @code{getopt} finds an option character in @var{argv} that was not +included in @var{options}, or a missing option argument, it returns +@samp{?} and sets the external variable @code{optopt} to the actual +option character. If the first character of @var{options} is a colon +(@samp{:}), then @code{getopt} returns @samp{:} instead of @samp{?} to +indicate a missing option argument. In addition, if the external +variable @code{opterr} is nonzero (which is the default), @code{getopt} +prints an error message. +@end deftypefun + +@node Example of Getopt +@subsection Example of Parsing Arguments with @code{getopt} + +Here is an example showing how @code{getopt} is typically used. The +key points to notice are: + +@itemize @bullet +@item +Normally, @code{getopt} is called in a loop. When @code{getopt} returns +@code{-1}, indicating no more options are present, the loop terminates. + +@item +A @code{switch} statement is used to dispatch on the return value from +@code{getopt}. In typical use, each case just sets a variable that +is used later in the program. + +@item +A second loop is used to process the remaining non-option arguments. +@end itemize + +@smallexample +@include testopt.c.texi +@end smallexample + +Here are some examples showing what this program prints with different +combinations of arguments: + +@smallexample +% testopt +aflag = 0, bflag = 0, cvalue = (null) + +% testopt -a -b +aflag = 1, bflag = 1, cvalue = (null) + +% testopt -ab +aflag = 1, bflag = 1, cvalue = (null) + +% testopt -c foo +aflag = 0, bflag = 0, cvalue = foo + +% testopt -cfoo +aflag = 0, bflag = 0, cvalue = foo + +% testopt arg1 +aflag = 0, bflag = 0, cvalue = (null) +Non-option argument arg1 + +% testopt -a arg1 +aflag = 1, bflag = 0, cvalue = (null) +Non-option argument arg1 + +% testopt -c foo arg1 +aflag = 0, bflag = 0, cvalue = foo +Non-option argument arg1 + +% testopt -a -- -b +aflag = 1, bflag = 0, cvalue = (null) +Non-option argument -b + +% testopt -a - +aflag = 1, bflag = 0, cvalue = (null) +Non-option argument - +@end smallexample + +@node Getopt Long Options +@subsection Parsing Long Options with @code{getopt_long} + +To accept GNU-style long options as well as single-character options, +use @code{getopt_long} instead of @code{getopt}. This function is +declared in @file{getopt.h}, not @file{unistd.h}. You should make every +program accept long options if it uses any options, for this takes +little extra work and helps beginners remember how to use the program. + +@comment getopt.h +@comment GNU +@deftp {Data Type} {struct option} +This structure describes a single long option name for the sake of +@code{getopt_long}. The argument @var{longopts} must be an array of +these structures, one for each long option. Terminate the array with an +element containing all zeros. + +The @code{struct option} structure has these fields: + +@table @code +@item const char *name +This field is the name of the option. It is a string. + +@item int has_arg +This field says whether the option takes an argument. It is an integer, +and there are three legitimate values: @w{@code{no_argument}}, +@code{required_argument} and @code{optional_argument}. + +@item int *flag +@itemx int val +These fields control how to report or act on the option when it occurs. + +If @code{flag} is a null pointer, then the @code{val} is a value which +identifies this option. Often these values are chosen to uniquely +identify particular long options. + +If @code{flag} is not a null pointer, it should be the address of an +@code{int} variable which is the flag for this option. The value in +@code{val} is the value to store in the flag to indicate that the option +was seen. +@end table +@end deftp + +@comment getopt.h +@comment GNU +@deftypefun int getopt_long (int @var{argc}, char *const *@var{argv}, const char *@var{shortopts}, const struct option *@var{longopts}, int *@var{indexptr}) +@safety{@prelim{}@mtunsafe{@mtasurace{:getopt} @mtsenv{}}@asunsafe{@ascuheap{} @ascuintl{} @asulock{} @asucorrupt{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +@c Same issues as getopt. +Decode options from the vector @var{argv} (whose length is @var{argc}). +The argument @var{shortopts} describes the short options to accept, just as +it does in @code{getopt}. The argument @var{longopts} describes the long +options to accept (see above). + +When @code{getopt_long} encounters a short option, it does the same +thing that @code{getopt} would do: it returns the character code for the +option, and stores the option's argument (if it has one) in @code{optarg}. + +When @code{getopt_long} encounters a long option, it takes actions based +on the @code{flag} and @code{val} fields of the definition of that +option. + +If @code{flag} is a null pointer, then @code{getopt_long} returns the +contents of @code{val} to indicate which option it found. You should +arrange distinct values in the @code{val} field for options with +different meanings, so you can decode these values after +@code{getopt_long} returns. If the long option is equivalent to a short +option, you can use the short option's character code in @code{val}. + +If @code{flag} is not a null pointer, that means this option should just +set a flag in the program. The flag is a variable of type @code{int} +that you define. Put the address of the flag in the @code{flag} field. +Put in the @code{val} field the value you would like this option to +store in the flag. In this case, @code{getopt_long} returns @code{0}. + +For any long option, @code{getopt_long} tells you the index in the array +@var{longopts} of the options definition, by storing it into +@code{*@var{indexptr}}. You can get the name of the option with +@code{@var{longopts}[*@var{indexptr}].name}. So you can distinguish among +long options either by the values in their @code{val} fields or by their +indices. You can also distinguish in this way among long options that +set flags. + +When a long option has an argument, @code{getopt_long} puts the argument +value in the variable @code{optarg} before returning. When the option +has no argument, the value in @code{optarg} is a null pointer. This is +how you can tell whether an optional argument was supplied. + +When @code{getopt_long} has no more options to handle, it returns +@code{-1}, and leaves in the variable @code{optind} the index in +@var{argv} of the next remaining argument. +@end deftypefun + +Since long option names were used before @code{getopt_long} +was invented there are program interfaces which require programs +to recognize options like @w{@samp{-option value}} instead of +@w{@samp{--option value}}. To enable these programs to use the GNU +getopt functionality there is one more function available. + +@comment getopt.h +@comment GNU +@deftypefun int getopt_long_only (int @var{argc}, char *const *@var{argv}, const char *@var{shortopts}, const struct option *@var{longopts}, int *@var{indexptr}) +@safety{@prelim{}@mtunsafe{@mtasurace{:getopt} @mtsenv{}}@asunsafe{@ascuheap{} @ascuintl{} @asulock{} @asucorrupt{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +@c Same issues as getopt. + +The @code{getopt_long_only} function is equivalent to the +@code{getopt_long} function but it allows the user of the +application to pass long options with only @samp{-} instead of +@samp{--}. The @samp{--} prefix is still recognized but instead of +looking through the short options if a @samp{-} is seen it is first +tried whether this parameter names a long option. If not, it is parsed +as a short option. + +Assuming @code{getopt_long_only} is used starting an application with + +@smallexample + app -foo +@end smallexample + +@noindent +the @code{getopt_long_only} will first look for a long option named +@samp{foo}. If this is not found, the short options @samp{f}, @samp{o}, +and again @samp{o} are recognized. +@end deftypefun + +@node Getopt Long Option Example +@subsection Example of Parsing Long Options with @code{getopt_long} + +@smallexample +@include longopt.c.texi +@end smallexample diff --git a/REORG.TODO/manual/header.texi b/REORG.TODO/manual/header.texi new file mode 100644 index 0000000000..2a551cd6e1 --- /dev/null +++ b/REORG.TODO/manual/header.texi @@ -0,0 +1,22 @@ +@node Library Summary, Installation, Language Features, Top +@c %MENU% A summary showing the syntax, header file, and derivation of each library feature +@appendix Summary of Library Facilities + +This appendix is a complete list of the facilities declared within the +header files supplied with @theglibc{}. Each entry also lists the +standard or other source from which each facility is derived, and tells +you where in the manual you can find more information about how to use +it. + +@c This table runs wide. Shrink fonts. +@iftex +@smallfonts @rm +@end iftex +@table @code +@comment summary.texi is generated from the other Texinfo files. +@comment See the Makefile and summary.awk for the details. +@include summary.texi +@end table +@iftex +@textfonts @rm +@end iftex diff --git a/REORG.TODO/manual/install-plain.texi b/REORG.TODO/manual/install-plain.texi new file mode 100644 index 0000000000..c5179e780c --- /dev/null +++ b/REORG.TODO/manual/install-plain.texi @@ -0,0 +1,5 @@ +@c This is for making the `INSTALL' file for the distribution. +@c Makeinfo ignores it when processing the file from the include. +@setfilename INSTALL +@set plain +@include install.texi diff --git a/REORG.TODO/manual/install.texi b/REORG.TODO/manual/install.texi new file mode 100644 index 0000000000..d39d2daacd --- /dev/null +++ b/REORG.TODO/manual/install.texi @@ -0,0 +1,655 @@ +@include macros.texi +@include pkgvers.texi + +@ifclear plain +@node Installation, Maintenance, Library Summary, Top +@end ifclear + +@c %MENU% How to install the GNU C Library +@appendix Installing @theglibc{} + +Before you do anything else, you should read the FAQ at +@url{http://sourceware.org/glibc/wiki/FAQ}. It answers common +questions and describes problems you may experience with compilation +and installation. + +Features can be added to @theglibc{} via @dfn{add-on} bundles. These are +separate tar files, which you unpack into the top level of the source +tree. Then you give @code{configure} the @samp{--enable-add-ons} option +to activate them, and they will be compiled into the library. + +You will need recent versions of several GNU tools: definitely GCC and +GNU Make, and possibly others. @xref{Tools for Compilation}, below. + +@ifclear plain +@menu +* Configuring and compiling:: How to compile and test GNU libc. +* Running make install:: How to install it once you've got it + compiled. +* Tools for Compilation:: You'll need these first. +* Linux:: Specific advice for GNU/Linux systems. +* Reporting Bugs:: So they'll get fixed. +@end menu +@end ifclear + +@node Configuring and compiling +@appendixsec Configuring and compiling @theglibc{} +@cindex configuring +@cindex compiling + +@Theglibc{} cannot be compiled in the source directory. You must build +it in a separate build directory. For example, if you have unpacked +the @glibcadj{} sources in @file{/src/gnu/glibc-@var{version}}, +create a directory +@file{/src/gnu/glibc-build} to put the object files in. This allows +removing the whole build directory in case an error occurs, which is +the safest way to get a fresh start and should always be done. + +From your object directory, run the shell script @file{configure} located +at the top level of the source tree. In the scenario above, you'd type + +@smallexample +$ ../glibc-@var{version}/configure @var{args@dots{}} +@end smallexample + +Please note that even though you're building in a separate build +directory, the compilation may need to create or modify files and +directories in the source directory. + +@noindent +@code{configure} takes many options, but the only one that is usually +mandatory is @samp{--prefix}. This option tells @code{configure} +where you want @theglibc{} installed. This defaults to @file{/usr/local}, +but the normal setting to install as the standard system library is +@samp{--prefix=/usr} for @gnulinuxsystems{} and @samp{--prefix=} (an +empty prefix) for @gnuhurdsystems{}. + +It may also be useful to set the @var{CC} and @var{CFLAGS} variables in +the environment when running @code{configure}. @var{CC} selects the C +compiler that will be used, and @var{CFLAGS} sets optimization options +for the compiler. + +The following list describes all of the available options for + @code{configure}: + +@table @samp +@item --prefix=@var{directory} +Install machine-independent data files in subdirectories of +@file{@var{directory}}. The default is to install in @file{/usr/local}. + +@item --exec-prefix=@var{directory} +Install the library and other machine-dependent files in subdirectories +of @file{@var{directory}}. The default is to the @samp{--prefix} +directory if that option is specified, or @file{/usr/local} otherwise. + +@item --with-headers=@var{directory} +Look for kernel header files in @var{directory}, not +@file{/usr/include}. @Theglibc{} needs information from the kernel's header +files describing the interface to the kernel. @Theglibc{} will normally +look in @file{/usr/include} for them, +but if you specify this option, it will look in @var{DIRECTORY} instead. + +This option is primarily of use on a system where the headers in +@file{/usr/include} come from an older version of @theglibc{}. Conflicts can +occasionally happen in this case. You can also use this option if you want to +compile @theglibc{} with a newer set of kernel headers than the ones found in +@file{/usr/include}. + +@item --enable-add-ons[=@var{list}] +Specify add-on packages to include in the build. If this option is +specified with no list, it enables all the add-on packages it finds in +the main source directory; this is the default behavior. You may +specify an explicit list of add-ons to use in @var{list}, separated by +spaces or commas (if you use spaces, remember to quote them from the +shell). Each add-on in @var{list} can be an absolute directory name +or can be a directory name relative to the main source directory, or +relative to the build directory (that is, the current working directory). +For example, @samp{--enable-add-ons=nptl,../glibc-libidn-@var{version}}. + +@item --enable-kernel=@var{version} +This option is currently only useful on @gnulinuxsystems{}. The +@var{version} parameter should have the form X.Y.Z and describes the +smallest version of the Linux kernel the generated library is expected +to support. The higher the @var{version} number is, the less +compatibility code is added, and the faster the code gets. + +@item --with-binutils=@var{directory} +Use the binutils (assembler and linker) in @file{@var{directory}}, not +the ones the C compiler would default to. You can use this option if +the default binutils on your system cannot deal with all the constructs +in @theglibc{}. In that case, @code{configure} will detect the +problem and suppress these constructs, so that the library will still be +usable, but functionality may be lost---for example, you can't build a +shared libc with old binutils. + +@item --without-fp +Use this option if your computer lacks hardware floating-point support +and your operating system does not emulate an FPU. + +@c disable static doesn't work currently +@c @item --disable-static +@c Don't build static libraries. Static libraries aren't that useful these +@c days, but we recommend you build them in case you need them. + +@item --disable-shared +Don't build shared libraries even if it is possible. Not all systems +support shared libraries; you need ELF support and (currently) the GNU +linker. + +@item --disable-profile +Don't build libraries with profiling information. You may want to use +this option if you don't plan to do profiling. + +@item --enable-static-nss +Compile static versions of the NSS (Name Service Switch) libraries. +This is not recommended because it defeats the purpose of NSS; a program +linked statically with the NSS libraries cannot be dynamically +reconfigured to use a different name database. + +@item --enable-hardcoded-path-in-tests +By default, dynamic tests are linked to run with the installed C library. +This option hardcodes the newly built C library path in dynamic tests +so that they can be invoked directly. + +@item --disable-timezone-tools +By default, timezone related utilities (@command{zic}, @command{zdump}, +and @command{tzselect}) are installed with @theglibc{}. If you are building +these independently (e.g. by using the @samp{tzcode} package), then this +option will allow disabling the install of these. + +Note that you need to make sure the external tools are kept in sync with +the versions that @theglibc{} expects as the data formats may change over +time. Consult the @file{timezone} subdirectory for more details. + +@item --enable-lock-elision=yes +Enable lock elision for pthread mutexes by default. + +@item --enable-stack-protector +@itemx --enable-stack-protector=strong +@itemx --enable-stack-protector=all +Compile the C library and all other parts of the glibc package +(including the threading and math libraries, NSS modules, and +transliteration modules) using the GCC @option{-fstack-protector}, +@option{-fstack-protector-strong} or @option{-fstack-protector-all} +options to detect stack overruns. Only the dynamic linker and a small +number of routines called directly from assembler are excluded from this +protection. + +@item --enable-bind-now +Disable lazy binding for installed shared objects. This provides +additional security hardening because it enables full RELRO and a +read-only global offset table (GOT), at the cost of slightly increased +program load times. + +@pindex pt_chown +@findex grantpt +@item --enable-pt_chown +The file @file{pt_chown} is a helper binary for @code{grantpt} +(@pxref{Allocation, Pseudo-Terminals}) that is installed setuid root to +fix up pseudo-terminal ownership. It is not built by default because +systems using the Linux kernel are commonly built with the @code{devpts} +filesystem enabled and mounted at @file{/dev/pts}, which manages +pseudo-terminal ownership automatically. By using +@samp{--enable-pt_chown}, you may build @file{pt_chown} and install it +setuid and owned by @code{root}. The use of @file{pt_chown} introduces +additional security risks to the system and you should enable it only if +you understand and accept those risks. + +@item --disable-werror +By default, @theglibc{} is built with @option{-Werror}. If you wish +to build without this option (for example, if building with a newer +version of GCC than this version of @theglibc{} was tested with, so +new warnings cause the build with @option{-Werror} to fail), you can +configure with @option{--disable-werror}. + +@item --disable-mathvec +By default for x86_64, @theglibc{} is built with the vector math library. +Use this option to disable the vector math library. + +@item --enable-tunables +Tunables support allows additional library parameters to be customized at +runtime. This is an experimental feature and affects startup time and is thus +disabled by default. This option can take the following values: + +@table @code +@item no +This is the default if the option is not passed to configure. This disables +tunables. + +@item yes +This is the default if the option is passed to configure. This enables tunables +and selects the default frontend (currently @samp{valstring}). + +@item valstring +This enables tunables and selects the @samp{valstring} frontend for tunables. +This frontend allows users to specify tunables as a colon-separated list in a +single environment variable @env{GLIBC_TUNABLES}. +@end table + +@item --enable-obsolete-nsl +By default, libnsl is only built as shared library for backward +compatibility and the NSS modules libnss_compat, libnss_nis and +libnss_nisplus are not built at all. +Use this option to enable libnsl with all depending NSS modules and +header files. + +@item --build=@var{build-system} +@itemx --host=@var{host-system} +These options are for cross-compiling. If you specify both options and +@var{build-system} is different from @var{host-system}, @code{configure} +will prepare to cross-compile @theglibc{} from @var{build-system} to be used +on @var{host-system}. You'll probably need the @samp{--with-headers} +option too, and you may have to override @var{configure}'s selection of +the compiler and/or binutils. + +If you only specify @samp{--host}, @code{configure} will prepare for a +native compile but use what you specify instead of guessing what your +system is. This is most useful to change the CPU submodel. For example, +if @code{configure} guesses your machine as @code{i686-pc-linux-gnu} but +you want to compile a library for 586es, give +@samp{--host=i586-pc-linux-gnu} or just @samp{--host=i586-linux} and add +the appropriate compiler flags (@samp{-mcpu=i586} will do the trick) to +@var{CFLAGS}. + +If you specify just @samp{--build}, @code{configure} will get confused. + +@item --with-pkgversion=@var{version} +Specify a description, possibly including a build number or build +date, of the binaries being built, to be included in +@option{--version} output from programs installed with @theglibc{}. +For example, @option{--with-pkgversion='FooBar GNU/Linux glibc build +123'}. The default value is @samp{GNU libc}. + +@item --with-bugurl=@var{url} +Specify the URL that users should visit if they wish to report a bug, +to be included in @option{--help} output from programs installed with +@theglibc{}. The default value refers to the main bug-reporting +information for @theglibc{}. +@end table + +To build the library and related programs, type @code{make}. This will +produce a lot of output, some of which may look like errors from +@code{make} but aren't. Look for error messages from @code{make} +containing @samp{***}. Those indicate that something is seriously wrong. + +The compilation process can take a long time, depending on the +configuration and the speed of your machine. Some complex modules may +take a very long time to compile, as much as several minutes on slower +machines. Do not panic if the compiler appears to hang. + +If you want to run a parallel make, simply pass the @samp{-j} option +with an appropriate numeric parameter to @code{make}. You need a recent +GNU @code{make} version, though. + +To build and run test programs which exercise some of the library +facilities, type @code{make check}. If it does not complete +successfully, do not use the built library, and report a bug after +verifying that the problem is not already known. @xref{Reporting Bugs}, +for instructions on reporting bugs. Note that some of the tests assume +they are not being run by @code{root}. We recommend you compile and +test @theglibc{} as an unprivileged user. + +Before reporting bugs make sure there is no problem with your system. +The tests (and later installation) use some pre-existing files of the +system such as @file{/etc/passwd}, @file{/etc/nsswitch.conf} and others. +These files must all contain correct and sensible content. + +Normally, @code{make check} will run all the tests before reporting +all problems found and exiting with error status if any problems +occurred. You can specify @samp{stop-on-test-failure=y} when running +@code{make check} to make the test run stop and exit with an error +status immediately when a failure occurs. + +The @glibcadj{} pretty printers come with their own set of scripts for testing, +which run together with the rest of the testsuite through @code{make check}. +These scripts require the following tools to run successfully: + +@itemize @bullet +@item +Python 2.7.6/3.4.3 or later + +Python is required for running the printers' test scripts. + +@item PExpect 4.0 + +The printer tests drive GDB through test programs and compare its output +to the printers'. PExpect is used to capture the output of GDB, and should be +compatible with the Python version in your system. + +@item +GDB 7.8 or later with support for Python 2.7.6/3.4.3 or later + +GDB itself needs to be configured with Python support in order to use the +pretty printers. Notice that your system having Python available doesn't imply +that GDB supports it, nor that your system's Python and GDB's have the same +version. +@end itemize + +@noindent +If these tools are absent, the printer tests will report themselves as +@code{UNSUPPORTED}. Notice that some of the printer tests require @theglibc{} +to be compiled with debugging symbols. + +To format the @cite{GNU C Library Reference Manual} for printing, type +@w{@code{make dvi}}. You need a working @TeX{} installation to do +this. The distribution builds the on-line formatted version of the +manual, as Info files, as part of the build process. You can build +them manually with @w{@code{make info}}. + +The library has a number of special-purpose configuration parameters +which you can find in @file{Makeconfig}. These can be overwritten with +the file @file{configparms}. To change them, create a +@file{configparms} in your build directory and add values as appropriate +for your system. The file is included and parsed by @code{make} and has +to follow the conventions for makefiles. + +It is easy to configure @theglibc{} for cross-compilation by +setting a few variables in @file{configparms}. Set @code{CC} to the +cross-compiler for the target you configured the library for; it is +important to use this same @code{CC} value when running +@code{configure}, like this: @samp{CC=@var{target}-gcc configure +@var{target}}. Set @code{BUILD_CC} to the compiler to use for programs +run on the build system as part of compiling the library. You may need to +set @code{AR} to cross-compiling versions of @code{ar} +if the native tools are not configured to work with +object files for the target you configured for. When cross-compiling +@theglibc{}, it may be tested using @samp{make check +test-wrapper="@var{srcdir}/scripts/cross-test-ssh.sh @var{hostname}"}, +where @var{srcdir} is the absolute directory name for the main source +directory and @var{hostname} is the host name of a system that can run +the newly built binaries of @theglibc{}. The source and build +directories must be visible at the same locations on both the build +system and @var{hostname}. + +In general, when testing @theglibc{}, @samp{test-wrapper} may be set +to the name and arguments of any program to run newly built binaries. +This program must preserve the arguments to the binary being run, its +working directory and the standard input, output and error file +descriptors. If @samp{@var{test-wrapper} env} will not work to run a +program with environment variables set, then @samp{test-wrapper-env} +must be set to a program that runs a newly built program with +environment variable assignments in effect, those assignments being +specified as @samp{@var{var}=@var{value}} before the name of the +program to be run. If multiple assignments to the same variable are +specified, the last assignment specified must take precedence. +Similarly, if @samp{@var{test-wrapper} env -i} will not work to run a +program with an environment completely empty of variables except those +directly assigned, then @samp{test-wrapper-env-only} must be set; its +use has the same syntax as @samp{test-wrapper-env}, the only +difference in its semantics being starting with an empty set of +environment variables rather than the ambient set. + + +@node Running make install +@appendixsec Installing the C Library +@cindex installing + +To install the library and its header files, and the Info files of the +manual, type @code{make install}. This will +build things, if necessary, before installing them; however, you should +still compile everything first. If you are installing @theglibc{} as your +primary C library, we recommend that you shut the system down to +single-user mode first, and reboot afterward. This minimizes the risk +of breaking things when the library changes out from underneath. + +@samp{make install} will do the entire job of upgrading from a +previous installation of @theglibc{} version 2.x. There may sometimes +be headers +left behind from the previous installation, but those are generally +harmless. If you want to avoid leaving headers behind you can do +things in the following order. + +You must first build the library (@samp{make}), optionally check it +(@samp{make check}), switch the include directories and then install +(@samp{make install}). The steps must be done in this order. Not moving +the directory before install will result in an unusable mixture of header +files from both libraries, but configuring, building, and checking the +library requires the ability to compile and run programs against the old +library. The new @file{/usr/include}, after switching the include +directories and before installing the library should contain the Linux +headers, but nothing else. If you do this, you will need to restore +any headers from libraries other than @theglibc{} yourself after installing the +library. + +You can install @theglibc{} somewhere other than where you configured +it to go by setting the @code{DESTDIR} GNU standard make variable on +the command line for @samp{make install}. The value of this variable +is prepended to all the paths for installation. This is useful when +setting up a chroot environment or preparing a binary distribution. +The directory should be specified with an absolute file name. Installing +with the @code{prefix} and @code{exec_prefix} GNU standard make variables +set is not supported. + +@Theglibc{} includes a daemon called @code{nscd}, which you +may or may not want to run. @code{nscd} caches name service lookups; it +can dramatically improve performance with NIS+, and may help with DNS as +well. + +One auxiliary program, @file{/usr/libexec/pt_chown}, is installed setuid +@code{root} if the @samp{--enable-pt_chown} configuration option is used. +This program is invoked by the @code{grantpt} function; it sets the +permissions on a pseudoterminal so it can be used by the calling process. +If you are using a Linux kernel with the @code{devpts} filesystem enabled +and mounted at @file{/dev/pts}, you don't need this program. + +After installation you might want to configure the timezone and locale +installation of your system. @Theglibc{} comes with a locale +database which gets configured with @code{localedef}. For example, to +set up a German locale with name @code{de_DE}, simply issue the command +@samp{localedef -i de_DE -f ISO-8859-1 de_DE}. To configure all locales +that are supported by @theglibc{}, you can issue from your build directory the +command @samp{make localedata/install-locales}. + +To configure the locally used timezone, set the @code{TZ} environment +variable. The script @code{tzselect} helps you to select the right value. +As an example, for Germany, @code{tzselect} would tell you to use +@samp{TZ='Europe/Berlin'}. For a system wide installation (the given +paths are for an installation with @samp{--prefix=/usr}), link the +timezone file which is in @file{/usr/share/zoneinfo} to the file +@file{/etc/localtime}. For Germany, you might execute @samp{ln -s +/usr/share/zoneinfo/Europe/Berlin /etc/localtime}. + +@node Tools for Compilation +@appendixsec Recommended Tools for Compilation +@cindex installation tools +@cindex tools, for installing library + +We recommend installing the following GNU tools before attempting to +build @theglibc{}: + +@itemize @bullet +@item +GNU @code{make} 3.79 or newer + +You need the latest version of GNU @code{make}. Modifying @theglibc{} +to work with other @code{make} programs would be so difficult that +we recommend you port GNU @code{make} instead. @strong{Really.} We +recommend GNU @code{make} version 3.79. All earlier versions have severe +bugs or lack features. + +@item +GCC 4.7 or newer + +GCC 4.7 or higher is required. In general it is recommended to use +the newest version of the compiler that is known to work for building +@theglibc{}, as newer compilers usually produce better code. As of +release time, GCC 6.3 is the newest compiler verified to work to build +@theglibc{}. + +For multi-arch support it is recommended to use a GCC which has been built with +support for GNU indirect functions. This ensures that correct debugging +information is generated for functions selected by IFUNC resolvers. This +support can either be enabled by configuring GCC with +@samp{--enable-gnu-indirect-function}, or by enabling it by default by setting +@samp{default_gnu_indirect_function} variable for a particular architecture in +the GCC source file @file{gcc/config.gcc}. + +You can use whatever compiler you like to compile programs that use +@theglibc{}. + +Check the FAQ for any special compiler issues on particular platforms. + +@item +GNU @code{binutils} 2.22 or later + +You must use GNU @code{binutils} (as and ld) to build @theglibc{}. +No other assembler or linker has the necessary functionality at the +moment. As of release time, GNU @code{binutils} 2.25 is the newest +verified to work to build @theglibc{}. + +@item +GNU @code{texinfo} 4.7 or later + +To correctly translate and install the Texinfo documentation you need +this version of the @code{texinfo} package. Earlier versions do not +understand all the tags used in the document, and the installation +mechanism for the info files is not present or works differently. +As of release time, @code{texinfo} 6.0 is the newest verified to work +to build @theglibc{}. + +@item +GNU @code{awk} 3.1.2, or higher + +@code{awk} is used in several places to generate files. +Some @code{gawk} extensions are used, including the @code{asorti} +function, which was introduced in version 3.1.2 of @code{gawk}. +As of release time, @code{gawk} version 4.1.3 is the newest verified +to work to build @theglibc{}. + +@item +Perl 5 + +Perl is not required, but it is used if present to test the +installation. We may decide to use it elsewhere in the future. + +@item +GNU @code{sed} 3.02 or newer + +@code{Sed} is used in several places to generate files. Most scripts work +with any version of @code{sed}. As of release time, @code{sed} version +4.2.2 is the newest verified to work to build @theglibc{}. + +@end itemize + +@noindent +If you change any of the @file{configure.ac} files you will also need + +@itemize @bullet +@item +GNU @code{autoconf} 2.69 (exactly) +@end itemize + +@noindent +and if you change any of the message translation files you will need + +@itemize @bullet +@item +GNU @code{gettext} 0.10.36 or later +@end itemize + +@noindent +If you wish to regenerate the @code{yacc} parser code in the @file{intl} +subdirectory you will need + +@itemize @bullet +@item +GNU @code{bison} 2.7 or later +@end itemize + +@noindent +You may also need these packages if you upgrade your source tree using +patches, although we try to avoid this. + +@node Linux +@appendixsec Specific advice for @gnulinuxsystems{} +@cindex kernel header files + +If you are installing @theglibc{} on @gnulinuxsystems{}, you need to have +the header files from a 3.2 or newer kernel around for reference. +(For the ia64 architecture, you need version 3.2.18 or newer because this +is the first version with support for the @code{accept4} system call.) +These headers must be installed using @samp{make headers_install}; the +headers present in the kernel source directory are not suitable for +direct use by @theglibc{}. You do not need to use that kernel, just have +its headers installed where @theglibc{} can access them, referred to here as +@var{install-directory}. The easiest way to do this is to unpack it +in a directory such as @file{/usr/src/linux-@var{version}}. In that +directory, run @samp{make headers_install +INSTALL_HDR_PATH=@var{install-directory}}. Finally, configure @theglibc{} +with the option @samp{--with-headers=@var{install-directory}/include}. +Use the most recent kernel you can get your hands on. (If you are +cross-compiling @theglibc{}, you need to specify +@samp{ARCH=@var{architecture}} in the @samp{make headers_install} +command, where @var{architecture} is the architecture name used by the +Linux kernel, such as @samp{x86} or @samp{powerpc}.) + +After installing @theglibc{}, you may need to remove or rename +directories such as @file{/usr/include/linux} and +@file{/usr/include/asm}, and replace them with copies of directories +such as @file{linux} and @file{asm} from +@file{@var{install-directory}/include}. All directories present in +@file{@var{install-directory}/include} should be copied, except that +@theglibc{} provides its own version of @file{/usr/include/scsi}; the +files provided by the kernel should be copied without replacing those +provided by @theglibc{}. The @file{linux}, @file{asm} and +@file{asm-generic} directories are required to compile programs using +@theglibc{}; the other directories describe interfaces to the kernel but +are not required if not compiling programs using those interfaces. +You do not need to copy kernel headers if you did not specify an +alternate kernel header source using @samp{--with-headers}. + +The Filesystem Hierarchy Standard for @gnulinuxsystems{} expects some +components of the @glibcadj{} installation to be in +@file{/lib} and some in @file{/usr/lib}. This is handled automatically +if you configure @theglibc{} with @samp{--prefix=/usr}. If you set some other +prefix or allow it to default to @file{/usr/local}, then all the +components are installed there. + +@node Reporting Bugs +@appendixsec Reporting Bugs +@cindex reporting bugs +@cindex bugs, reporting + +There are probably bugs in @theglibc{}. There are certainly +errors and omissions in this manual. If you report them, they will get +fixed. If you don't, no one will ever know about them and they will +remain unfixed for all eternity, if not longer. + +It is a good idea to verify that the problem has not already been +reported. Bugs are documented in two places: The file @file{BUGS} +describes a number of well known bugs and the central @glibcadj{} +bug tracking system has a +WWW interface at +@url{http://sourceware.org/bugzilla/}. The WWW +interface gives you access to open and closed reports. A closed report +normally includes a patch or a hint on solving the problem. + +To report a bug, first you must find it. With any luck, this will be the +hard part. Once you've found a bug, make sure it's really a bug. A +good way to do this is to see if @theglibc{} behaves the same way +some other C library does. If so, probably you are wrong and the +libraries are right (but not necessarily). If not, one of the libraries +is probably wrong. It might not be @theglibc{}. Many historical +Unix C libraries permit things that we don't, such as closing a file +twice. + +If you think you have found some way in which @theglibc{} does not +conform to the ISO and POSIX standards (@pxref{Standards and +Portability}), that is definitely a bug. Report it! + +Once you're sure you've found a bug, try to narrow it down to the +smallest test case that reproduces the problem. In the case of a C +library, you really only need to narrow it down to one library +function call, if possible. This should not be too difficult. + +The final step when you have a simple test case is to report the bug. +Do this at @value{REPORT_BUGS_TO}. + +If you are not sure how a function should behave, and this manual +doesn't tell you, that's a bug in the manual. Report that too! If the +function's behavior disagrees with the manual, then either the library +or the manual has a bug, so report the disagreement. If you find any +errors or omissions in this manual, please report them to the +bug database. If you refer to specific +sections of the manual, please include the section names for easier +identification. diff --git a/REORG.TODO/manual/intro.texi b/REORG.TODO/manual/intro.texi new file mode 100644 index 0000000000..cc9c99f543 --- /dev/null +++ b/REORG.TODO/manual/intro.texi @@ -0,0 +1,1506 @@ +@node Introduction, Error Reporting, Top, Top +@chapter Introduction +@c %MENU% Purpose of the GNU C Library + +The C language provides no built-in facilities for performing such +common operations as input/output, memory management, string +manipulation, and the like. Instead, these facilities are defined +in a standard @dfn{library}, which you compile and link with your +programs. +@cindex library + +@Theglibc{}, described in this document, defines all of the +library functions that are specified by the @w{ISO C} standard, as well as +additional features specific to POSIX and other derivatives of the Unix +operating system, and extensions specific to @gnusystems{}. + +The purpose of this manual is to tell you how to use the facilities +of @theglibc{}. We have mentioned which features belong to which +standards to help you identify things that are potentially non-portable +to other systems. But the emphasis in this manual is not on strict +portability. + +@menu +* Getting Started:: What this manual is for and how to use it. +* Standards and Portability:: Standards and sources upon which the GNU + C library is based. +* Using the Library:: Some practical uses for the library. +* Roadmap to the Manual:: Overview of the remaining chapters in + this manual. +@end menu + +@node Getting Started, Standards and Portability, , Introduction +@section Getting Started + +This manual is written with the assumption that you are at least +somewhat familiar with the C programming language and basic programming +concepts. Specifically, familiarity with ISO standard C +(@pxref{ISO C}), rather than ``traditional'' pre-ISO C dialects, is +assumed. + +@Theglibc{} includes several @dfn{header files}, each of which +provides definitions and declarations for a group of related facilities; +this information is used by the C compiler when processing your program. +For example, the header file @file{stdio.h} declares facilities for +performing input and output, and the header file @file{string.h} +declares string processing utilities. The organization of this manual +generally follows the same division as the header files. + +If you are reading this manual for the first time, you should read all +of the introductory material and skim the remaining chapters. There are +a @emph{lot} of functions in @theglibc{} and it's not realistic to +expect that you will be able to remember exactly @emph{how} to use each +and every one of them. It's more important to become generally familiar +with the kinds of facilities that the library provides, so that when you +are writing your programs you can recognize @emph{when} to make use of +library functions, and @emph{where} in this manual you can find more +specific information about them. + + +@node Standards and Portability, Using the Library, Getting Started, Introduction +@section Standards and Portability +@cindex standards + +This section discusses the various standards and other sources that @theglibc{} +is based upon. These sources include the @w{ISO C} and +POSIX standards, and the System V and Berkeley Unix implementations. + +The primary focus of this manual is to tell you how to make effective +use of the @glibcadj{} facilities. But if you are concerned about +making your programs compatible with these standards, or portable to +operating systems other than GNU, this can affect how you use the +library. This section gives you an overview of these standards, so that +you will know what they are when they are mentioned in other parts of +the manual. + +@xref{Library Summary}, for an alphabetical list of the functions and +other symbols provided by the library. This list also states which +standards each function or symbol comes from. + +@menu +* ISO C:: The international standard for the C + programming language. +* POSIX:: The ISO/IEC 9945 (aka IEEE 1003) standards + for operating systems. +* Berkeley Unix:: BSD and SunOS. +* SVID:: The System V Interface Description. +* XPG:: The X/Open Portability Guide. +@end menu + +@node ISO C, POSIX, , Standards and Portability +@subsection ISO C +@cindex ISO C + +@Theglibc{} is compatible with the C standard adopted by the +American National Standards Institute (ANSI): +@cite{American National Standard X3.159-1989---``ANSI C''} and later +by the International Standardization Organization (ISO): +@cite{ISO/IEC 9899:1990, ``Programming languages---C''}. +We here refer to the standard as @w{ISO C} since this is the more +general standard in respect of ratification. +The header files and library facilities that make up @theglibc{} are +a superset of those specified by the @w{ISO C} standard.@refill + +@pindex gcc +If you are concerned about strict adherence to the @w{ISO C} standard, you +should use the @samp{-ansi} option when you compile your programs with +the GNU C compiler. This tells the compiler to define @emph{only} ISO +standard features from the library header files, unless you explicitly +ask for additional features. @xref{Feature Test Macros}, for +information on how to do this. + +Being able to restrict the library to include only @w{ISO C} features is +important because @w{ISO C} puts limitations on what names can be defined +by the library implementation, and the GNU extensions don't fit these +limitations. @xref{Reserved Names}, for more information about these +restrictions. + +This manual does not attempt to give you complete details on the +differences between @w{ISO C} and older dialects. It gives advice on how +to write programs to work portably under multiple C dialects, but does +not aim for completeness. + + +@node POSIX, Berkeley Unix, ISO C, Standards and Portability +@subsection POSIX (The Portable Operating System Interface) +@cindex POSIX +@cindex POSIX.1 +@cindex IEEE Std 1003.1 +@cindex ISO/IEC 9945-1 +@cindex POSIX.2 +@cindex IEEE Std 1003.2 +@cindex ISO/IEC 9945-2 + +@Theglibc{} is also compatible with the ISO @dfn{POSIX} family of +standards, known more formally as the @dfn{Portable Operating System +Interface for Computer Environments} (ISO/IEC 9945). They were also +published as ANSI/IEEE Std 1003. POSIX is derived mostly from various +versions of the Unix operating system. + +The library facilities specified by the POSIX standards are a superset +of those required by @w{ISO C}; POSIX specifies additional features for +@w{ISO C} functions, as well as specifying new additional functions. In +general, the additional requirements and functionality defined by the +POSIX standards are aimed at providing lower-level support for a +particular kind of operating system environment, rather than general +programming language support which can run in many diverse operating +system environments.@refill + +@Theglibc{} implements all of the functions specified in +@cite{ISO/IEC 9945-1:1996, the POSIX System Application Program +Interface}, commonly referred to as POSIX.1. The primary extensions to +the @w{ISO C} facilities specified by this standard include file system +interface primitives (@pxref{File System Interface}), device-specific +terminal control functions (@pxref{Low-Level Terminal Interface}), and +process control functions (@pxref{Processes}). + +Some facilities from @cite{ISO/IEC 9945-2:1993, the POSIX Shell and +Utilities standard} (POSIX.2) are also implemented in @theglibc{}. +These include utilities for dealing with regular expressions and other +pattern matching facilities (@pxref{Pattern Matching}). + +@menu +* POSIX Safety Concepts:: Safety concepts from POSIX. +* Unsafe Features:: Features that make functions unsafe. +* Conditionally Safe Features:: Features that make functions unsafe + in the absence of workarounds. +* Other Safety Remarks:: Additional safety features and remarks. +@end menu + +@comment Roland sez: +@comment The GNU C library as it stands conforms to 1003.2 draft 11, which +@comment specifies: +@comment +@comment Several new macros in <limits.h>. +@comment popen, pclose +@comment <regex.h> (which is not yet fully implemented--wait on this) +@comment fnmatch +@comment getopt +@comment <glob.h> +@comment <wordexp.h> (not yet implemented) +@comment confstr + +@node POSIX Safety Concepts, Unsafe Features, , POSIX +@subsubsection POSIX Safety Concepts +@cindex POSIX Safety Concepts + +This manual documents various safety properties of @glibcadj{} +functions, in lines that follow their prototypes and look like: + +@sampsafety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +The properties are assessed according to the criteria set forth in the +POSIX standard for such safety contexts as Thread-, Async-Signal- and +Async-Cancel- -Safety. Intuitive definitions of these properties, +attempting to capture the meaning of the standard definitions, follow. + +@itemize @bullet + +@item +@cindex MT-Safe +@cindex Thread-Safe +@code{MT-Safe} or Thread-Safe functions are safe to call in the presence +of other threads. MT, in MT-Safe, stands for Multi Thread. + +Being MT-Safe does not imply a function is atomic, nor that it uses any +of the memory synchronization mechanisms POSIX exposes to users. It is +even possible that calling MT-Safe functions in sequence does not yield +an MT-Safe combination. For example, having a thread call two MT-Safe +functions one right after the other does not guarantee behavior +equivalent to atomic execution of a combination of both functions, since +concurrent calls in other threads may interfere in a destructive way. + +Whole-program optimizations that could inline functions across library +interfaces may expose unsafe reordering, and so performing inlining +across the @glibcadj{} interface is not recommended. The documented +MT-Safety status is not guaranteed under whole-program optimization. +However, functions defined in user-visible headers are designed to be +safe for inlining. + + +@item +@cindex AS-Safe +@cindex Async-Signal-Safe +@code{AS-Safe} or Async-Signal-Safe functions are safe to call from +asynchronous signal handlers. AS, in AS-Safe, stands for Asynchronous +Signal. + +Many functions that are AS-Safe may set @code{errno}, or modify the +floating-point environment, because their doing so does not make them +unsuitable for use in signal handlers. However, programs could +misbehave should asynchronous signal handlers modify this thread-local +state, and the signal handling machinery cannot be counted on to +preserve it. Therefore, signal handlers that call functions that may +set @code{errno} or modify the floating-point environment @emph{must} +save their original values, and restore them before returning. + + +@item +@cindex AC-Safe +@cindex Async-Cancel-Safe +@code{AC-Safe} or Async-Cancel-Safe functions are safe to call when +asynchronous cancellation is enabled. AC in AC-Safe stands for +Asynchronous Cancellation. + +The POSIX standard defines only three functions to be AC-Safe, namely +@code{pthread_cancel}, @code{pthread_setcancelstate}, and +@code{pthread_setcanceltype}. At present @theglibc{} provides no +guarantees beyond these three functions, but does document which +functions are presently AC-Safe. This documentation is provided for use +by @theglibc{} developers. + +Just like signal handlers, cancellation cleanup routines must configure +the floating point environment they require. The routines cannot assume +a floating point environment, particularly when asynchronous +cancellation is enabled. If the configuration of the floating point +environment cannot be performed atomically then it is also possible that +the environment encountered is internally inconsistent. + + +@item +@cindex MT-Unsafe +@cindex Thread-Unsafe +@cindex AS-Unsafe +@cindex Async-Signal-Unsafe +@cindex AC-Unsafe +@cindex Async-Cancel-Unsafe +@code{MT-Unsafe}, @code{AS-Unsafe}, @code{AC-Unsafe} functions are not +safe to call within the safety contexts described above. Calling them +within such contexts invokes undefined behavior. + +Functions not explicitly documented as safe in a safety context should +be regarded as Unsafe. + + +@item +@cindex Preliminary +@code{Preliminary} safety properties are documented, indicating these +properties may @emph{not} be counted on in future releases of +@theglibc{}. + +Such preliminary properties are the result of an assessment of the +properties of our current implementation, rather than of what is +mandated and permitted by current and future standards. + +Although we strive to abide by the standards, in some cases our +implementation is safe even when the standard does not demand safety, +and in other cases our implementation does not meet the standard safety +requirements. The latter are most likely bugs; the former, when marked +as @code{Preliminary}, should not be counted on: future standards may +require changes that are not compatible with the additional safety +properties afforded by the current implementation. + +Furthermore, the POSIX standard does not offer a detailed definition of +safety. We assume that, by ``safe to call'', POSIX means that, as long +as the program does not invoke undefined behavior, the ``safe to call'' +function behaves as specified, and does not cause other functions to +deviate from their specified behavior. We have chosen to use its loose +definitions of safety, not because they are the best definitions to use, +but because choosing them harmonizes this manual with POSIX. + +Please keep in mind that these are preliminary definitions and +annotations, and certain aspects of the definitions are still under +discussion and might be subject to clarification or change. + +Over time, we envision evolving the preliminary safety notes into stable +commitments, as stable as those of our interfaces. As we do, we will +remove the @code{Preliminary} keyword from safety notes. As long as the +keyword remains, however, they are not to be regarded as a promise of +future behavior. + + +@end itemize + +Other keywords that appear in safety notes are defined in subsequent +sections. + + +@node Unsafe Features, Conditionally Safe Features, POSIX Safety Concepts, POSIX +@subsubsection Unsafe Features +@cindex Unsafe Features + +Functions that are unsafe to call in certain contexts are annotated with +keywords that document their features that make them unsafe to call. +AS-Unsafe features in this section indicate the functions are never safe +to call when asynchronous signals are enabled. AC-Unsafe features +indicate they are never safe to call when asynchronous cancellation is +enabled. There are no MT-Unsafe marks in this section. + +@itemize @bullet + +@item @code{lock} +@cindex lock + +Functions marked with @code{lock} as an AS-Unsafe feature may be +interrupted by a signal while holding a non-recursive lock. If the +signal handler calls another such function that takes the same lock, the +result is a deadlock. + +Functions annotated with @code{lock} as an AC-Unsafe feature may, if +cancelled asynchronously, fail to release a lock that would have been +released if their execution had not been interrupted by asynchronous +thread cancellation. Once a lock is left taken, attempts to take that +lock will block indefinitely. + + +@item @code{corrupt} +@cindex corrupt + +Functions marked with @code{corrupt} as an AS-Unsafe feature may corrupt +data structures and misbehave when they interrupt, or are interrupted +by, another such function. Unlike functions marked with @code{lock}, +these take recursive locks to avoid MT-Safety problems, but this is not +enough to stop a signal handler from observing a partially-updated data +structure. Further corruption may arise from the interrupted function's +failure to notice updates made by signal handlers. + +Functions marked with @code{corrupt} as an AC-Unsafe feature may leave +data structures in a corrupt, partially updated state. Subsequent uses +of the data structure may misbehave. + +@c A special case, probably not worth documenting separately, involves +@c reallocing, or even freeing pointers. Any case involving free could +@c be easily turned into an ac-safe leak by resetting the pointer before +@c releasing it; I don't think we have any case that calls for this sort +@c of fixing. Fixing the realloc cases would require a new interface: +@c instead of @code{ptr=realloc(ptr,size)} we'd have to introduce +@c @code{acsafe_realloc(&ptr,size)} that would modify ptr before +@c releasing the old memory. The ac-unsafe realloc could be implemented +@c in terms of an internal interface with this semantics (say +@c __acsafe_realloc), but since realloc can be overridden, the function +@c we call to implement realloc should not be this internal interface, +@c but another internal interface that calls __acsafe_realloc if realloc +@c was not overridden, and calls the overridden realloc with async +@c cancel disabled. --lxoliva + + +@item @code{heap} +@cindex heap + +Functions marked with @code{heap} may call heap memory management +functions from the @code{malloc}/@code{free} family of functions and are +only as safe as those functions. This note is thus equivalent to: + +@sampsafety{@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} + + +@c Check for cases that should have used plugin instead of or in +@c addition to this. Then, after rechecking gettext, adjust i18n if +@c needed. +@item @code{dlopen} +@cindex dlopen + +Functions marked with @code{dlopen} use the dynamic loader to load +shared libraries into the current execution image. This involves +opening files, mapping them into memory, allocating additional memory, +resolving symbols, applying relocations and more, all of this while +holding internal dynamic loader locks. + +The locks are enough for these functions to be AS- and AC-Unsafe, but +other issues may arise. At present this is a placeholder for all +potential safety issues raised by @code{dlopen}. + +@c dlopen runs init and fini sections of the module; does this mean +@c dlopen always implies plugin? + + +@item @code{plugin} +@cindex plugin + +Functions annotated with @code{plugin} may run code from plugins that +may be external to @theglibc{}. Such plugin functions are assumed to be +MT-Safe, AS-Unsafe and AC-Unsafe. Examples of such plugins are stack +@cindex NSS +unwinding libraries, name service switch (NSS) and character set +@cindex iconv +conversion (iconv) back-ends. + +Although the plugins mentioned as examples are all brought in by means +of dlopen, the @code{plugin} keyword does not imply any direct +involvement of the dynamic loader or the @code{libdl} interfaces, those +are covered by @code{dlopen}. For example, if one function loads a +module and finds the addresses of some of its functions, while another +just calls those already-resolved functions, the former will be marked +with @code{dlopen}, whereas the latter will get the @code{plugin}. When +a single function takes all of these actions, then it gets both marks. + + +@item @code{i18n} +@cindex i18n + +Functions marked with @code{i18n} may call internationalization +functions of the @code{gettext} family and will be only as safe as those +functions. This note is thus equivalent to: + +@sampsafety{@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @ascudlopen{}}@acunsafe{@acucorrupt{}}} + + +@item @code{timer} +@cindex timer + +Functions marked with @code{timer} use the @code{alarm} function or +similar to set a time-out for a system call or a long-running operation. +In a multi-threaded program, there is a risk that the time-out signal +will be delivered to a different thread, thus failing to interrupt the +intended thread. Besides being MT-Unsafe, such functions are always +AS-Unsafe, because calling them in signal handlers may interfere with +timers set in the interrupted code, and AC-Unsafe, because there is no +safe way to guarantee an earlier timer will be reset in case of +asynchronous cancellation. + +@end itemize + + +@node Conditionally Safe Features, Other Safety Remarks, Unsafe Features, POSIX +@subsubsection Conditionally Safe Features +@cindex Conditionally Safe Features + +For some features that make functions unsafe to call in certain +contexts, there are known ways to avoid the safety problem other than +refraining from calling the function altogether. The keywords that +follow refer to such features, and each of their definitions indicate +how the whole program needs to be constrained in order to remove the +safety problem indicated by the keyword. Only when all the reasons that +make a function unsafe are observed and addressed, by applying the +documented constraints, does the function become safe to call in a +context. + +@itemize @bullet + +@item @code{init} +@cindex init + +Functions marked with @code{init} as an MT-Unsafe feature perform +MT-Unsafe initialization when they are first called. + +Calling such a function at least once in single-threaded mode removes +this specific cause for the function to be regarded as MT-Unsafe. If no +other cause for that remains, the function can then be safely called +after other threads are started. + +Functions marked with @code{init} as an AS- or AC-Unsafe feature use the +internal @code{libc_once} machinery or similar to initialize internal +data structures. + +If a signal handler interrupts such an initializer, and calls any +function that also performs @code{libc_once} initialization, it will +deadlock if the thread library has been loaded. + +Furthermore, if an initializer is partially complete before it is +canceled or interrupted by a signal whose handler requires the same +initialization, some or all of the initialization may be performed more +than once, leaking resources or even resulting in corrupt internal data. + +Applications that need to call functions marked with @code{init} as an +AS- or AC-Unsafe feature should ensure the initialization is performed +before configuring signal handlers or enabling cancellation, so that the +AS- and AC-Safety issues related with @code{libc_once} do not arise. + +@c We may have to extend the annotations to cover conditions in which +@c initialization may or may not occur, since an initial call in a safe +@c context is no use if the initialization doesn't take place at that +@c time: it doesn't remove the risk for later calls. + + +@item @code{race} +@cindex race + +Functions annotated with @code{race} as an MT-Safety issue operate on +objects in ways that may cause data races or similar forms of +destructive interference out of concurrent execution. In some cases, +the objects are passed to the functions by users; in others, they are +used by the functions to return values to users; in others, they are not +even exposed to users. + +We consider access to objects passed as (indirect) arguments to +functions to be data race free. The assurance of data race free objects +is the caller's responsibility. We will not mark a function as +MT-Unsafe or AS-Unsafe if it misbehaves when users fail to take the +measures required by POSIX to avoid data races when dealing with such +objects. As a general rule, if a function is documented as reading from +an object passed (by reference) to it, or modifying it, users ought to +use memory synchronization primitives to avoid data races just as they +would should they perform the accesses themselves rather than by calling +the library function. @code{FILE} streams are the exception to the +general rule, in that POSIX mandates the library to guard against data +races in many functions that manipulate objects of this specific opaque +type. We regard this as a convenience provided to users, rather than as +a general requirement whose expectations should extend to other types. + +In order to remind users that guarding certain arguments is their +responsibility, we will annotate functions that take objects of certain +types as arguments. We draw the line for objects passed by users as +follows: objects whose types are exposed to users, and that users are +expected to access directly, such as memory buffers, strings, and +various user-visible @code{struct} types, do @emph{not} give reason for +functions to be annotated with @code{race}. It would be noisy and +redundant with the general requirement, and not many would be surprised +by the library's lack of internal guards when accessing objects that can +be accessed directly by users. + +As for objects that are opaque or opaque-like, in that they are to be +manipulated only by passing them to library functions (e.g., +@code{FILE}, @code{DIR}, @code{obstack}, @code{iconv_t}), there might be +additional expectations as to internal coordination of access by the +library. We will annotate, with @code{race} followed by a colon and the +argument name, functions that take such objects but that do not take +care of synchronizing access to them by default. For example, +@code{FILE} stream @code{unlocked} functions will be annotated, but +those that perform implicit locking on @code{FILE} streams by default +will not, even though the implicit locking may be disabled on a +per-stream basis. + +In either case, we will not regard as MT-Unsafe functions that may +access user-supplied objects in unsafe ways should users fail to ensure +the accesses are well defined. The notion prevails that users are +expected to safeguard against data races any user-supplied objects that +the library accesses on their behalf. + +@c The above describes @mtsrace; @mtasurace is described below. + +This user responsibility does not apply, however, to objects controlled +by the library itself, such as internal objects and static buffers used +to return values from certain calls. When the library doesn't guard +them against concurrent uses, these cases are regarded as MT-Unsafe and +AS-Unsafe (although the @code{race} mark under AS-Unsafe will be omitted +as redundant with the one under MT-Unsafe). As in the case of +user-exposed objects, the mark may be followed by a colon and an +identifier. The identifier groups all functions that operate on a +certain unguarded object; users may avoid the MT-Safety issues related +with unguarded concurrent access to such internal objects by creating a +non-recursive mutex related with the identifier, and always holding the +mutex when calling any function marked as racy on that identifier, as +they would have to should the identifier be an object under user +control. The non-recursive mutex avoids the MT-Safety issue, but it +trades one AS-Safety issue for another, so use in asynchronous signals +remains undefined. + +When the identifier relates to a static buffer used to hold return +values, the mutex must be held for as long as the buffer remains in use +by the caller. Many functions that return pointers to static buffers +offer reentrant variants that store return values in caller-supplied +buffers instead. In some cases, such as @code{tmpname}, the variant is +chosen not by calling an alternate entry point, but by passing a +non-@code{NULL} pointer to the buffer in which the returned values are +to be stored. These variants are generally preferable in multi-threaded +programs, although some of them are not MT-Safe because of other +internal buffers, also documented with @code{race} notes. + + +@item @code{const} +@cindex const + +Functions marked with @code{const} as an MT-Safety issue non-atomically +modify internal objects that are better regarded as constant, because a +substantial portion of @theglibc{} accesses them without +synchronization. Unlike @code{race}, that causes both readers and +writers of internal objects to be regarded as MT-Unsafe and AS-Unsafe, +this mark is applied to writers only. Writers remain equally MT- and +AS-Unsafe to call, but the then-mandatory constness of objects they +modify enables readers to be regarded as MT-Safe and AS-Safe (as long as +no other reasons for them to be unsafe remain), since the lack of +synchronization is not a problem when the objects are effectively +constant. + +The identifier that follows the @code{const} mark will appear by itself +as a safety note in readers. Programs that wish to work around this +safety issue, so as to call writers, may use a non-recursve +@code{rwlock} associated with the identifier, and guard @emph{all} calls +to functions marked with @code{const} followed by the identifier with a +write lock, and @emph{all} calls to functions marked with the identifier +by itself with a read lock. The non-recursive locking removes the +MT-Safety problem, but it trades one AS-Safety problem for another, so +use in asynchronous signals remains undefined. + +@c But what if, instead of marking modifiers with const:id and readers +@c with just id, we marked writers with race:id and readers with ro:id? +@c Instead of having to define each instance of “id”, we'd have a +@c general pattern governing all such “id”s, wherein race:id would +@c suggest the need for an exclusive/write lock to make the function +@c safe, whereas ro:id would indicate “id” is expected to be read-only, +@c but if any modifiers are called (while holding an exclusive lock), +@c then ro:id-marked functions ought to be guarded with a read lock for +@c safe operation. ro:env or ro:locale, for example, seems to convey +@c more clearly the expectations and the meaning, than just env or +@c locale. + + +@item @code{sig} +@cindex sig + +Functions marked with @code{sig} as a MT-Safety issue (that implies an +identical AS-Safety issue, omitted for brevity) may temporarily install +a signal handler for internal purposes, which may interfere with other +uses of the signal, identified after a colon. + +This safety problem can be worked around by ensuring that no other uses +of the signal will take place for the duration of the call. Holding a +non-recursive mutex while calling all functions that use the same +temporary signal; blocking that signal before the call and resetting its +handler afterwards is recommended. + +There is no safe way to guarantee the original signal handler is +restored in case of asynchronous cancellation, therefore so-marked +functions are also AC-Unsafe. + +@c fixme: at least deferred cancellation should get it right, and would +@c obviate the restoring bit below, and the qualifier above. + +Besides the measures recommended to work around the MT- and AS-Safety +problem, in order to avert the cancellation problem, disabling +asynchronous cancellation @emph{and} installing a cleanup handler to +restore the signal to the desired state and to release the mutex are +recommended. + + +@item @code{term} +@cindex term + +Functions marked with @code{term} as an MT-Safety issue may change the +terminal settings in the recommended way, namely: call @code{tcgetattr}, +modify some flags, and then call @code{tcsetattr}; this creates a window +in which changes made by other threads are lost. Thus, functions marked +with @code{term} are MT-Unsafe. The same window enables changes made by +asynchronous signals to be lost. These functions are also AS-Unsafe, +but the corresponding mark is omitted as redundant. + +It is thus advisable for applications using the terminal to avoid +concurrent and reentrant interactions with it, by not using it in signal +handlers or blocking signals that might use it, and holding a lock while +calling these functions and interacting with the terminal. This lock +should also be used for mutual exclusion with functions marked with +@code{@mtasurace{:tcattr(fd)}}, where @var{fd} is a file descriptor for +the controlling terminal. The caller may use a single mutex for +simplicity, or use one mutex per terminal, even if referenced by +different file descriptors. + +Functions marked with @code{term} as an AC-Safety issue are supposed to +restore terminal settings to their original state, after temporarily +changing them, but they may fail to do so if cancelled. + +@c fixme: at least deferred cancellation should get it right, and would +@c obviate the restoring bit below, and the qualifier above. + +Besides the measures recommended to work around the MT- and AS-Safety +problem, in order to avert the cancellation problem, disabling +asynchronous cancellation @emph{and} installing a cleanup handler to +restore the terminal settings to the original state and to release the +mutex are recommended. + + +@end itemize + + +@node Other Safety Remarks, , Conditionally Safe Features, POSIX +@subsubsection Other Safety Remarks +@cindex Other Safety Remarks + +Additional keywords may be attached to functions, indicating features +that do not make a function unsafe to call, but that may need to be +taken into account in certain classes of programs: + +@itemize @bullet + +@item @code{locale} +@cindex locale + +Functions annotated with @code{locale} as an MT-Safety issue read from +the locale object without any form of synchronization. Functions +annotated with @code{locale} called concurrently with locale changes may +behave in ways that do not correspond to any of the locales active +during their execution, but an unpredictable mix thereof. + +We do not mark these functions as MT- or AS-Unsafe, however, because +functions that modify the locale object are marked with +@code{const:locale} and regarded as unsafe. Being unsafe, the latter +are not to be called when multiple threads are running or asynchronous +signals are enabled, and so the locale can be considered effectively +constant in these contexts, which makes the former safe. + +@c Should the locking strategy suggested under @code{const} be used, +@c failure to guard locale uses is not as fatal as data races in +@c general: unguarded uses will @emph{not} follow dangling pointers or +@c access uninitialized, unmapped or recycled memory. Each access will +@c read from a consistent locale object that is or was active at some +@c point during its execution. Without synchronization, however, it +@c cannot even be assumed that, after a change in locale, earlier +@c locales will no longer be used, even after the newly-chosen one is +@c used in the thread. Nevertheless, even though unguarded reads from +@c the locale will not violate type safety, functions that access the +@c locale multiple times may invoke all sorts of undefined behavior +@c because of the unexpected locale changes. + + +@item @code{env} +@cindex env + +Functions marked with @code{env} as an MT-Safety issue access the +environment with @code{getenv} or similar, without any guards to ensure +safety in the presence of concurrent modifications. + +We do not mark these functions as MT- or AS-Unsafe, however, because +functions that modify the environment are all marked with +@code{const:env} and regarded as unsafe. Being unsafe, the latter are +not to be called when multiple threads are running or asynchronous +signals are enabled, and so the environment can be considered +effectively constant in these contexts, which makes the former safe. + + +@item @code{hostid} +@cindex hostid + +The function marked with @code{hostid} as an MT-Safety issue reads from +the system-wide data structures that hold the ``host ID'' of the +machine. These data structures cannot generally be modified atomically. +Since it is expected that the ``host ID'' will not normally change, the +function that reads from it (@code{gethostid}) is regarded as safe, +whereas the function that modifies it (@code{sethostid}) is marked with +@code{@mtasuconst{:@mtshostid{}}}, indicating it may require special +care if it is to be called. In this specific case, the special care +amounts to system-wide (not merely intra-process) coordination. + + +@item @code{sigintr} +@cindex sigintr + +Functions marked with @code{sigintr} as an MT-Safety issue access the +@code{_sigintr} internal data structure without any guards to ensure +safety in the presence of concurrent modifications. + +We do not mark these functions as MT- or AS-Unsafe, however, because +functions that modify the this data structure are all marked with +@code{const:sigintr} and regarded as unsafe. Being unsafe, the latter +are not to be called when multiple threads are running or asynchronous +signals are enabled, and so the data structure can be considered +effectively constant in these contexts, which makes the former safe. + + +@item @code{fd} +@cindex fd + +Functions annotated with @code{fd} as an AC-Safety issue may leak file +descriptors if asynchronous thread cancellation interrupts their +execution. + +Functions that allocate or deallocate file descriptors will generally be +marked as such. Even if they attempted to protect the file descriptor +allocation and deallocation with cleanup regions, allocating a new +descriptor and storing its number where the cleanup region could release +it cannot be performed as a single atomic operation. Similarly, +releasing the descriptor and taking it out of the data structure +normally responsible for releasing it cannot be performed atomically. +There will always be a window in which the descriptor cannot be released +because it was not stored in the cleanup handler argument yet, or it was +already taken out before releasing it. It cannot be taken out after +release: an open descriptor could mean either that the descriptor still +has to be closed, or that it already did so but the descriptor was +reallocated by another thread or signal handler. + +Such leaks could be internally avoided, with some performance penalty, +by temporarily disabling asynchronous thread cancellation. However, +since callers of allocation or deallocation functions would have to do +this themselves, to avoid the same sort of leak in their own layer, it +makes more sense for the library to assume they are taking care of it +than to impose a performance penalty that is redundant when the problem +is solved in upper layers, and insufficient when it is not. + +This remark by itself does not cause a function to be regarded as +AC-Unsafe. However, cumulative effects of such leaks may pose a +problem for some programs. If this is the case, suspending asynchronous +cancellation for the duration of calls to such functions is recommended. + + +@item @code{mem} +@cindex mem + +Functions annotated with @code{mem} as an AC-Safety issue may leak +memory if asynchronous thread cancellation interrupts their execution. + +The problem is similar to that of file descriptors: there is no atomic +interface to allocate memory and store its address in the argument to a +cleanup handler, or to release it and remove its address from that +argument, without at least temporarily disabling asynchronous +cancellation, which these functions do not do. + +This remark does not by itself cause a function to be regarded as +generally AC-Unsafe. However, cumulative effects of such leaks may be +severe enough for some programs that disabling asynchronous cancellation +for the duration of calls to such functions may be required. + + +@item @code{cwd} +@cindex cwd + +Functions marked with @code{cwd} as an MT-Safety issue may temporarily +change the current working directory during their execution, which may +cause relative pathnames to be resolved in unexpected ways in other +threads or within asynchronous signal or cancellation handlers. + +This is not enough of a reason to mark so-marked functions as MT- or +AS-Unsafe, but when this behavior is optional (e.g., @code{nftw} with +@code{FTW_CHDIR}), avoiding the option may be a good alternative to +using full pathnames or file descriptor-relative (e.g. @code{openat}) +system calls. + + +@item @code{!posix} +@cindex !posix + +This remark, as an MT-, AS- or AC-Safety note to a function, indicates +the safety status of the function is known to differ from the specified +status in the POSIX standard. For example, POSIX does not require a +function to be Safe, but our implementation is, or vice-versa. + +For the time being, the absence of this remark does not imply the safety +properties we documented are identical to those mandated by POSIX for +the corresponding functions. + + +@item @code{:identifier} +@cindex :identifier + +Annotations may sometimes be followed by identifiers, intended to group +several functions that e.g. access the data structures in an unsafe way, +as in @code{race} and @code{const}, or to provide more specific +information, such as naming a signal in a function marked with +@code{sig}. It is envisioned that it may be applied to @code{lock} and +@code{corrupt} as well in the future. + +In most cases, the identifier will name a set of functions, but it may +name global objects or function arguments, or identifiable properties or +logical components associated with them, with a notation such as +e.g. @code{:buf(arg)} to denote a buffer associated with the argument +@var{arg}, or @code{:tcattr(fd)} to denote the terminal attributes of a +file descriptor @var{fd}. + +The most common use for identifiers is to provide logical groups of +functions and arguments that need to be protected by the same +synchronization primitive in order to ensure safe operation in a given +context. + + +@item @code{/condition} +@cindex /condition + +Some safety annotations may be conditional, in that they only apply if a +boolean expression involving arguments, global variables or even the +underlying kernel evaluates to true. Such conditions as +@code{/hurd} or @code{/!linux!bsd} indicate the preceding marker only +applies when the underlying kernel is the HURD, or when it is neither +Linux nor a BSD kernel, respectively. @code{/!ps} and +@code{/one_per_line} indicate the preceding marker only applies when +argument @var{ps} is NULL, or global variable @var{one_per_line} is +nonzero. + +When all marks that render a function unsafe are adorned with such +conditions, and none of the named conditions hold, then the function can +be regarded as safe. + + +@end itemize + + +@node Berkeley Unix, SVID, POSIX, Standards and Portability +@subsection Berkeley Unix +@cindex BSD Unix +@cindex 4.@var{n} BSD Unix +@cindex Berkeley Unix +@cindex SunOS +@cindex Unix, Berkeley + +@Theglibc{} defines facilities from some versions of Unix which +are not formally standardized, specifically from the 4.2 BSD, 4.3 BSD, +and 4.4 BSD Unix systems (also known as @dfn{Berkeley Unix}) and from +@dfn{SunOS} (a popular 4.2 BSD derivative that includes some Unix System +V functionality). These systems support most of the @w{ISO C} and POSIX +facilities, and 4.4 BSD and newer releases of SunOS in fact support them all. + +The BSD facilities include symbolic links (@pxref{Symbolic Links}), the +@code{select} function (@pxref{Waiting for I/O}), the BSD signal +functions (@pxref{BSD Signal Handling}), and sockets (@pxref{Sockets}). + +@node SVID, XPG, Berkeley Unix, Standards and Portability +@subsection SVID (The System V Interface Description) +@cindex SVID +@cindex System V Unix +@cindex Unix, System V + +The @dfn{System V Interface Description} (SVID) is a document describing +the AT&T Unix System V operating system. It is to some extent a +superset of the POSIX standard (@pxref{POSIX}). + +@Theglibc{} defines most of the facilities required by the SVID +that are not also required by the @w{ISO C} or POSIX standards, for +compatibility with System V Unix and other Unix systems (such as +SunOS) which include these facilities. However, many of the more +obscure and less generally useful facilities required by the SVID are +not included. (In fact, Unix System V itself does not provide them all.) + +The supported facilities from System V include the methods for +inter-process communication and shared memory, the @code{hsearch} and +@code{drand48} families of functions, @code{fmtmsg} and several of the +mathematical functions. + +@node XPG, , SVID, Standards and Portability +@subsection XPG (The X/Open Portability Guide) + +The X/Open Portability Guide, published by the X/Open Company, Ltd., is +a more general standard than POSIX. X/Open owns the Unix copyright and +the XPG specifies the requirements for systems which are intended to be +a Unix system. + +@Theglibc{} complies to the X/Open Portability Guide, Issue 4.2, +with all extensions common to XSI (X/Open System Interface) +compliant systems and also all X/Open UNIX extensions. + +The additions on top of POSIX are mainly derived from functionality +available in @w{System V} and BSD systems. Some of the really bad +mistakes in @w{System V} systems were corrected, though. Since +fulfilling the XPG standard with the Unix extensions is a +precondition for getting the Unix brand chances are good that the +functionality is available on commercial systems. + + +@node Using the Library, Roadmap to the Manual, Standards and Portability, Introduction +@section Using the Library + +This section describes some of the practical issues involved in using +@theglibc{}. + +@menu +* Header Files:: How to include the header files in your + programs. +* Macro Definitions:: Some functions in the library may really + be implemented as macros. +* Reserved Names:: The C standard reserves some names for + the library, and some for users. +* Feature Test Macros:: How to control what names are defined. +@end menu + +@node Header Files, Macro Definitions, , Using the Library +@subsection Header Files +@cindex header files + +Libraries for use by C programs really consist of two parts: @dfn{header +files} that define types and macros and declare variables and +functions; and the actual library or @dfn{archive} that contains the +definitions of the variables and functions. + +(Recall that in C, a @dfn{declaration} merely provides information that +a function or variable exists and gives its type. For a function +declaration, information about the types of its arguments might be +provided as well. The purpose of declarations is to allow the compiler +to correctly process references to the declared variables and functions. +A @dfn{definition}, on the other hand, actually allocates storage for a +variable or says what a function does.) +@cindex definition (compared to declaration) +@cindex declaration (compared to definition) + +In order to use the facilities in @theglibc{}, you should be sure +that your program source files include the appropriate header files. +This is so that the compiler has declarations of these facilities +available and can correctly process references to them. Once your +program has been compiled, the linker resolves these references to +the actual definitions provided in the archive file. + +Header files are included into a program source file by the +@samp{#include} preprocessor directive. The C language supports two +forms of this directive; the first, + +@smallexample +#include "@var{header}" +@end smallexample + +@noindent +is typically used to include a header file @var{header} that you write +yourself; this would contain definitions and declarations describing the +interfaces between the different parts of your particular application. +By contrast, + +@smallexample +#include <file.h> +@end smallexample + +@noindent +is typically used to include a header file @file{file.h} that contains +definitions and declarations for a standard library. This file would +normally be installed in a standard place by your system administrator. +You should use this second form for the C library header files. + +Typically, @samp{#include} directives are placed at the top of the C +source file, before any other code. If you begin your source files with +some comments explaining what the code in the file does (a good idea), +put the @samp{#include} directives immediately afterwards, following the +feature test macro definition (@pxref{Feature Test Macros}). + +For more information about the use of header files and @samp{#include} +directives, @pxref{Header Files,,, cpp.info, The GNU C Preprocessor +Manual}.@refill + +@Theglibc{} provides several header files, each of which contains +the type and macro definitions and variable and function declarations +for a group of related facilities. This means that your programs may +need to include several header files, depending on exactly which +facilities you are using. + +Some library header files include other library header files +automatically. However, as a matter of programming style, you should +not rely on this; it is better to explicitly include all the header +files required for the library facilities you are using. The @glibcadj{} +header files have been written in such a way that it doesn't +matter if a header file is accidentally included more than once; +including a header file a second time has no effect. Likewise, if your +program needs to include multiple header files, the order in which they +are included doesn't matter. + +@strong{Compatibility Note:} Inclusion of standard header files in any +order and any number of times works in any @w{ISO C} implementation. +However, this has traditionally not been the case in many older C +implementations. + +Strictly speaking, you don't @emph{have to} include a header file to use +a function it declares; you could declare the function explicitly +yourself, according to the specifications in this manual. But it is +usually better to include the header file because it may define types +and macros that are not otherwise available and because it may define +more efficient macro replacements for some functions. It is also a sure +way to have the correct declaration. + +@node Macro Definitions, Reserved Names, Header Files, Using the Library +@subsection Macro Definitions of Functions +@cindex shadowing functions with macros +@cindex removing macros that shadow functions +@cindex undefining macros that shadow functions + +If we describe something as a function in this manual, it may have a +macro definition as well. This normally has no effect on how your +program runs---the macro definition does the same thing as the function +would. In particular, macro equivalents for library functions evaluate +arguments exactly once, in the same way that a function call would. The +main reason for these macro definitions is that sometimes they can +produce an inline expansion that is considerably faster than an actual +function call. + +Taking the address of a library function works even if it is also +defined as a macro. This is because, in this context, the name of the +function isn't followed by the left parenthesis that is syntactically +necessary to recognize a macro call. + +You might occasionally want to avoid using the macro definition of a +function---perhaps to make your program easier to debug. There are +two ways you can do this: + +@itemize @bullet +@item +You can avoid a macro definition in a specific use by enclosing the name +of the function in parentheses. This works because the name of the +function doesn't appear in a syntactic context where it is recognizable +as a macro call. + +@item +You can suppress any macro definition for a whole source file by using +the @samp{#undef} preprocessor directive, unless otherwise stated +explicitly in the description of that facility. +@end itemize + +For example, suppose the header file @file{stdlib.h} declares a function +named @code{abs} with + +@smallexample +extern int abs (int); +@end smallexample + +@noindent +and also provides a macro definition for @code{abs}. Then, in: + +@smallexample +#include <stdlib.h> +int f (int *i) @{ return abs (++*i); @} +@end smallexample + +@noindent +the reference to @code{abs} might refer to either a macro or a function. +On the other hand, in each of the following examples the reference is +to a function and not a macro. + +@smallexample +#include <stdlib.h> +int g (int *i) @{ return (abs) (++*i); @} + +#undef abs +int h (int *i) @{ return abs (++*i); @} +@end smallexample + +Since macro definitions that double for a function behave in +exactly the same way as the actual function version, there is usually no +need for any of these methods. In fact, removing macro definitions usually +just makes your program slower. + + +@node Reserved Names, Feature Test Macros, Macro Definitions, Using the Library +@subsection Reserved Names +@cindex reserved names +@cindex name space + +The names of all library types, macros, variables and functions that +come from the @w{ISO C} standard are reserved unconditionally; your program +@strong{may not} redefine these names. All other library names are +reserved if your program explicitly includes the header file that +defines or declares them. There are several reasons for these +restrictions: + +@itemize @bullet +@item +Other people reading your code could get very confused if you were using +a function named @code{exit} to do something completely different from +what the standard @code{exit} function does, for example. Preventing +this situation helps to make your programs easier to understand and +contributes to modularity and maintainability. + +@item +It avoids the possibility of a user accidentally redefining a library +function that is called by other library functions. If redefinition +were allowed, those other functions would not work properly. + +@item +It allows the compiler to do whatever special optimizations it pleases +on calls to these functions, without the possibility that they may have +been redefined by the user. Some library facilities, such as those for +dealing with variadic arguments (@pxref{Variadic Functions}) +and non-local exits (@pxref{Non-Local Exits}), actually require a +considerable amount of cooperation on the part of the C compiler, and +with respect to the implementation, it might be easier for the compiler +to treat these as built-in parts of the language. +@end itemize + +In addition to the names documented in this manual, reserved names +include all external identifiers (global functions and variables) that +begin with an underscore (@samp{_}) and all identifiers regardless of +use that begin with either two underscores or an underscore followed by +a capital letter are reserved names. This is so that the library and +header files can define functions, variables, and macros for internal +purposes without risk of conflict with names in user programs. + +Some additional classes of identifier names are reserved for future +extensions to the C language or the POSIX.1 environment. While using these +names for your own purposes right now might not cause a problem, they do +raise the possibility of conflict with future versions of the C +or POSIX standards, so you should avoid these names. + +@itemize @bullet +@item +Names beginning with a capital @samp{E} followed a digit or uppercase +letter may be used for additional error code names. @xref{Error +Reporting}. + +@item +Names that begin with either @samp{is} or @samp{to} followed by a +lowercase letter may be used for additional character testing and +conversion functions. @xref{Character Handling}. + +@item +Names that begin with @samp{LC_} followed by an uppercase letter may be +used for additional macros specifying locale attributes. +@xref{Locales}. + +@item +Names of all existing mathematics functions (@pxref{Mathematics}) +suffixed with @samp{f} or @samp{l} are reserved for corresponding +functions that operate on @code{float} and @code{long double} arguments, +respectively. + +@item +Names that begin with @samp{SIG} followed by an uppercase letter are +reserved for additional signal names. @xref{Standard Signals}. + +@item +Names that begin with @samp{SIG_} followed by an uppercase letter are +reserved for additional signal actions. @xref{Basic Signal Handling}. + +@item +Names beginning with @samp{str}, @samp{mem}, or @samp{wcs} followed by a +lowercase letter are reserved for additional string and array functions. +@xref{String and Array Utilities}. + +@item +Names that end with @samp{_t} are reserved for additional type names. +@end itemize + +In addition, some individual header files reserve names beyond +those that they actually define. You only need to worry about these +restrictions if your program includes that particular header file. + +@itemize @bullet +@item +The header file @file{dirent.h} reserves names prefixed with +@samp{d_}. +@pindex dirent.h + +@item +The header file @file{fcntl.h} reserves names prefixed with +@samp{l_}, @samp{F_}, @samp{O_}, and @samp{S_}. +@pindex fcntl.h + +@item +The header file @file{grp.h} reserves names prefixed with @samp{gr_}. +@pindex grp.h + +@item +The header file @file{limits.h} reserves names suffixed with @samp{_MAX}. +@pindex limits.h + +@item +The header file @file{pwd.h} reserves names prefixed with @samp{pw_}. +@pindex pwd.h + +@item +The header file @file{signal.h} reserves names prefixed with @samp{sa_} +and @samp{SA_}. +@pindex signal.h + +@item +The header file @file{sys/stat.h} reserves names prefixed with @samp{st_} +and @samp{S_}. +@pindex sys/stat.h + +@item +The header file @file{sys/times.h} reserves names prefixed with @samp{tms_}. +@pindex sys/times.h + +@item +The header file @file{termios.h} reserves names prefixed with @samp{c_}, +@samp{V}, @samp{I}, @samp{O}, and @samp{TC}; and names prefixed with +@samp{B} followed by a digit. +@pindex termios.h +@end itemize + +@comment Include the section on Creature Nest Macros. +@include creature.texi + +@node Roadmap to the Manual, , Using the Library, Introduction +@section Roadmap to the Manual + +Here is an overview of the contents of the remaining chapters of +this manual. + +@c The chapter overview ordering is: +@c Error Reporting (2) +@c Virtual Memory Allocation and Paging (3) +@c Character Handling (4) +@c Strings and Array Utilities (5) +@c Character Set Handling (6) +@c Locales and Internationalization (7) +@c Searching and Sorting (9) +@c Pattern Matching (10) +@c Input/Output Overview (11) +@c Input/Output on Streams (12) +@c Low-level Input/Ooutput (13) +@c File System Interface (14) +@c Pipes and FIFOs (15) +@c Sockets (16) +@c Low-Level Terminal Interface (17) +@c Syslog (18) +@c Mathematics (19) +@c Aritmetic Functions (20) +@c Date and Time (21) +@c Non-Local Exist (23) +@c Signal Handling (24) +@c The Basic Program/System Interface (25) +@c Processes (26) +@c Job Control (28) +@c System Databases and Name Service Switch (29) +@c Users and Groups (30) -- References `User Database' and `Group Database' +@c System Management (31) +@c System Configuration Parameters (32) +@c C Language Facilities in the Library (AA) +@c Summary of Library Facilities (AB) +@c Installing (AC) +@c Library Maintenance (AD) + +@c The following chapters need overview text to be added: +@c Message Translation (8) +@c Resource Usage And Limitations (22) +@c Inter-Process Communication (27) +@c DES Encryption and Password Handling (33) +@c Debugging support (34) +@c POSIX Threads (35) +@c Internal Probes (36) +@c Platform-specific facilities (AE) +@c Contributors to (AF) +@c Free Software Needs Free Documentation (AG) +@c GNU Lesser General Public License (AH) +@c GNU Free Documentation License (AI) + +@itemize @bullet +@item +@ref{Error Reporting}, describes how errors detected by the library +are reported. + + +@item +@ref{Memory}, describes @theglibc{}'s facilities for managing and +using virtual and real memory, including dynamic allocation of virtual +memory. If you do not know in advance how much memory your program +needs, you can allocate it dynamically instead, and manipulate it via +pointers. + +@item +@ref{Character Handling}, contains information about character +classification functions (such as @code{isspace}) and functions for +performing case conversion. + +@item +@ref{String and Array Utilities}, has descriptions of functions for +manipulating strings (null-terminated character arrays) and general +byte arrays, including operations such as copying and comparison. + +@item +@ref{Character Set Handling}, contains information about manipulating +characters and strings using character sets larger than will fit in +the usual @code{char} data type. + +@item +@ref{Locales}, describes how selecting a particular country +or language affects the behavior of the library. For example, the locale +affects collation sequences for strings and how monetary values are +formatted. + +@item +@ref{Searching and Sorting}, contains information about functions +for searching and sorting arrays. You can use these functions on any +kind of array by providing an appropriate comparison function. + +@item +@ref{Pattern Matching}, presents functions for matching regular expressions +and shell file name patterns, and for expanding words as the shell does. + +@item +@ref{I/O Overview}, gives an overall look at the input and output +facilities in the library, and contains information about basic concepts +such as file names. + +@item +@ref{I/O on Streams}, describes I/O operations involving streams (or +@w{@code{FILE *}} objects). These are the normal C library functions +from @file{stdio.h}. + +@item +@ref{Low-Level I/O}, contains information about I/O operations +on file descriptors. File descriptors are a lower-level mechanism +specific to the Unix family of operating systems. + +@item +@ref{File System Interface}, has descriptions of operations on entire +files, such as functions for deleting and renaming them and for creating +new directories. This chapter also contains information about how you +can access the attributes of a file, such as its owner and file protection +modes. + +@item +@ref{Pipes and FIFOs}, contains information about simple interprocess +communication mechanisms. Pipes allow communication between two related +processes (such as between a parent and child), while FIFOs allow +communication between processes sharing a common file system on the same +machine. + +@item +@ref{Sockets}, describes a more complicated interprocess communication +mechanism that allows processes running on different machines to +communicate over a network. This chapter also contains information about +Internet host addressing and how to use the system network databases. + +@item +@ref{Low-Level Terminal Interface}, describes how you can change the +attributes of a terminal device. If you want to disable echo of +characters typed by the user, for example, read this chapter. + +@item +@ref{Mathematics}, contains information about the math library +functions. These include things like random-number generators and +remainder functions on integers as well as the usual trigonometric and +exponential functions on floating-point numbers. + +@item +@ref{Arithmetic,, Low-Level Arithmetic Functions}, describes functions +for simple arithmetic, analysis of floating-point values, and reading +numbers from strings. + +@item +@ref{Date and Time}, describes functions for measuring both calendar time +and CPU time, as well as functions for setting alarms and timers. + +@item +@ref{Non-Local Exits}, contains descriptions of the @code{setjmp} and +@code{longjmp} functions. These functions provide a facility for +@code{goto}-like jumps which can jump from one function to another. + +@item +@ref{Signal Handling}, tells you all about signals---what they are, +how to establish a handler that is called when a particular kind of +signal is delivered, and how to prevent signals from arriving during +critical sections of your program. + +@item +@ref{Program Basics}, tells how your programs can access their +command-line arguments and environment variables. + +@item +@ref{Processes}, contains information about how to start new processes +and run programs. + +@item +@ref{Job Control}, describes functions for manipulating process groups +and the controlling terminal. This material is probably only of +interest if you are writing a shell or other program which handles job +control specially. + +@item +@ref{Name Service Switch}, describes the services which are available +for looking up names in the system databases, how to determine which +service is used for which database, and how these services are +implemented so that contributors can design their own services. + +@item +@ref{User Database}, and @ref{Group Database}, tell you how to access +the system user and group databases. + +@item +@ref{System Management}, describes functions for controlling and getting +information about the hardware and software configuration your program +is executing under. + +@item +@ref{System Configuration}, tells you how you can get information about +various operating system limits. Most of these parameters are provided for +compatibility with POSIX. + +@item +@ref{Language Features}, contains information about library support for +standard parts of the C language, including things like the @code{sizeof} +operator and the symbolic constant @code{NULL}, how to write functions +accepting variable numbers of arguments, and constants describing the +ranges and other properties of the numerical types. There is also a simple +debugging mechanism which allows you to put assertions in your code, and +have diagnostic messages printed if the tests fail. + +@item +@ref{Library Summary}, gives a summary of all the functions, variables, and +macros in the library, with complete data types and function prototypes, +and says what standard or system each is derived from. + +@item +@ref{Installation}, explains how to build and install @theglibc{} on +your system, and how to report any bugs you might find. + +@item +@ref{Maintenance}, explains how to add new functions or port the +library to a new system. +@end itemize + +If you already know the name of the facility you are interested in, you +can look it up in @ref{Library Summary}. This gives you a summary of +its syntax and a pointer to where you can find a more detailed +description. This appendix is particularly useful if you just want to +verify the order and type of arguments to a function, for example. It +also tells you what standard or system each function, variable, or macro +is derived from. diff --git a/REORG.TODO/manual/io.texi b/REORG.TODO/manual/io.texi new file mode 100644 index 0000000000..bd82f76ee8 --- /dev/null +++ b/REORG.TODO/manual/io.texi @@ -0,0 +1,395 @@ +@node I/O Overview, I/O on Streams, Pattern Matching, Top +@c %MENU% Introduction to the I/O facilities +@chapter Input/Output Overview + +Most programs need to do either input (reading data) or output (writing +data), or most frequently both, in order to do anything useful. @Theglibc{} +provides such a large selection of input and output functions +that the hardest part is often deciding which function is most +appropriate! + +This chapter introduces concepts and terminology relating to input +and output. Other chapters relating to the GNU I/O facilities are: + +@itemize @bullet +@item +@ref{I/O on Streams}, which covers the high-level functions +that operate on streams, including formatted input and output. + +@item +@ref{Low-Level I/O}, which covers the basic I/O and control +functions on file descriptors. + +@item +@ref{File System Interface}, which covers functions for operating on +directories and for manipulating file attributes such as access modes +and ownership. + +@item +@ref{Pipes and FIFOs}, which includes information on the basic interprocess +communication facilities. + +@item +@ref{Sockets}, which covers a more complicated interprocess communication +facility with support for networking. + +@item +@ref{Low-Level Terminal Interface}, which covers functions for changing +how input and output to terminals or other serial devices are processed. +@end itemize + + +@menu +* I/O Concepts:: Some basic information and terminology. +* File Names:: How to refer to a file. +@end menu + +@node I/O Concepts, File Names, , I/O Overview +@section Input/Output Concepts + +Before you can read or write the contents of a file, you must establish +a connection or communications channel to the file. This process is +called @dfn{opening} the file. You can open a file for reading, writing, +or both. +@cindex opening a file + +The connection to an open file is represented either as a stream or as a +file descriptor. You pass this as an argument to the functions that do +the actual read or write operations, to tell them which file to operate +on. Certain functions expect streams, and others are designed to +operate on file descriptors. + +When you have finished reading to or writing from the file, you can +terminate the connection by @dfn{closing} the file. Once you have +closed a stream or file descriptor, you cannot do any more input or +output operations on it. + +@menu +* Streams and File Descriptors:: The GNU C Library provides two ways + to access the contents of files. +* File Position:: The number of bytes from the + beginning of the file. +@end menu + +@node Streams and File Descriptors, File Position, , I/O Concepts +@subsection Streams and File Descriptors + +When you want to do input or output to a file, you have a choice of two +basic mechanisms for representing the connection between your program +and the file: file descriptors and streams. File descriptors are +represented as objects of type @code{int}, while streams are represented +as @code{FILE *} objects. + +File descriptors provide a primitive, low-level interface to input and +output operations. Both file descriptors and streams can represent a +connection to a device (such as a terminal), or a pipe or socket for +communicating with another process, as well as a normal file. But, if +you want to do control operations that are specific to a particular kind +of device, you must use a file descriptor; there are no facilities to +use streams in this way. You must also use file descriptors if your +program needs to do input or output in special modes, such as +nonblocking (or polled) input (@pxref{File Status Flags}). + +Streams provide a higher-level interface, layered on top of the +primitive file descriptor facilities. The stream interface treats all +kinds of files pretty much alike---the sole exception being the three +styles of buffering that you can choose (@pxref{Stream Buffering}). + +The main advantage of using the stream interface is that the set of +functions for performing actual input and output operations (as opposed +to control operations) on streams is much richer and more powerful than +the corresponding facilities for file descriptors. The file descriptor +interface provides only simple functions for transferring blocks of +characters, but the stream interface also provides powerful formatted +input and output functions (@code{printf} and @code{scanf}) as well as +functions for character- and line-oriented input and output. +@c !!! glibc has dprintf, which lets you do printf on an fd. + +Since streams are implemented in terms of file descriptors, you can +extract the file descriptor from a stream and perform low-level +operations directly on the file descriptor. You can also initially open +a connection as a file descriptor and then make a stream associated with +that file descriptor. + +In general, you should stick with using streams rather than file +descriptors, unless there is some specific operation you want to do that +can only be done on a file descriptor. If you are a beginning +programmer and aren't sure what functions to use, we suggest that you +concentrate on the formatted input functions (@pxref{Formatted Input}) +and formatted output functions (@pxref{Formatted Output}). + +If you are concerned about portability of your programs to systems other +than GNU, you should also be aware that file descriptors are not as +portable as streams. You can expect any system running @w{ISO C} to +support streams, but @nongnusystems{} may not support file descriptors at +all, or may only implement a subset of the GNU functions that operate on +file descriptors. Most of the file descriptor functions in @theglibc{} +are included in the POSIX.1 standard, however. + +@node File Position, , Streams and File Descriptors, I/O Concepts +@subsection File Position + +One of the attributes of an open file is its @dfn{file position} that +keeps track of where in the file the next character is to be read or +written. On @gnusystems{}, and all POSIX.1 systems, the file position +is simply an integer representing the number of bytes from the beginning +of the file. + +The file position is normally set to the beginning of the file when it +is opened, and each time a character is read or written, the file +position is incremented. In other words, access to the file is normally +@dfn{sequential}. +@cindex file position +@cindex sequential-access files + +Ordinary files permit read or write operations at any position within +the file. Some other kinds of files may also permit this. Files which +do permit this are sometimes referred to as @dfn{random-access} files. +You can change the file position using the @code{fseek} function on a +stream (@pxref{File Positioning}) or the @code{lseek} function on a file +descriptor (@pxref{I/O Primitives}). If you try to change the file +position on a file that doesn't support random access, you get the +@code{ESPIPE} error. +@cindex random-access files + +Streams and descriptors that are opened for @dfn{append access} are +treated specially for output: output to such files is @emph{always} +appended sequentially to the @emph{end} of the file, regardless of the +file position. However, the file position is still used to control where in +the file reading is done. +@cindex append-access files + +If you think about it, you'll realize that several programs can read a +given file at the same time. In order for each program to be able to +read the file at its own pace, each program must have its own file +pointer, which is not affected by anything the other programs do. + +In fact, each opening of a file creates a separate file position. +Thus, if you open a file twice even in the same program, you get two +streams or descriptors with independent file positions. + +By contrast, if you open a descriptor and then duplicate it to get +another descriptor, these two descriptors share the same file position: +changing the file position of one descriptor will affect the other. + +@node File Names, , I/O Concepts, I/O Overview +@section File Names + +In order to open a connection to a file, or to perform other operations +such as deleting a file, you need some way to refer to the file. Nearly +all files have names that are strings---even files which are actually +devices such as tape drives or terminals. These strings are called +@dfn{file names}. You specify the file name to say which file you want +to open or operate on. + +This section describes the conventions for file names and how the +operating system works with them. +@cindex file name + +@menu +* Directories:: Directories contain entries for files. +* File Name Resolution:: A file name specifies how to look up a file. +* File Name Errors:: Error conditions relating to file names. +* File Name Portability:: File name portability and syntax issues. +@end menu + + +@node Directories, File Name Resolution, , File Names +@subsection Directories + +In order to understand the syntax of file names, you need to understand +how the file system is organized into a hierarchy of directories. + +@cindex directory +@cindex link +@cindex directory entry +A @dfn{directory} is a file that contains information to associate other +files with names; these associations are called @dfn{links} or +@dfn{directory entries}. Sometimes, people speak of ``files in a +directory'', but in reality, a directory only contains pointers to +files, not the files themselves. + +@cindex file name component +The name of a file contained in a directory entry is called a @dfn{file +name component}. In general, a file name consists of a sequence of one +or more such components, separated by the slash character (@samp{/}). A +file name which is just one component names a file with respect to its +directory. A file name with multiple components names a directory, and +then a file in that directory, and so on. + +Some other documents, such as the POSIX standard, use the term +@dfn{pathname} for what we call a file name, and either @dfn{filename} +or @dfn{pathname component} for what this manual calls a file name +component. We don't use this terminology because a ``path'' is +something completely different (a list of directories to search), and we +think that ``pathname'' used for something else will confuse users. We +always use ``file name'' and ``file name component'' (or sometimes just +``component'', where the context is obvious) in GNU documentation. Some +macros use the POSIX terminology in their names, such as +@code{PATH_MAX}. These macros are defined by the POSIX standard, so we +cannot change their names. + +You can find more detailed information about operations on directories +in @ref{File System Interface}. + +@node File Name Resolution, File Name Errors, Directories, File Names +@subsection File Name Resolution + +A file name consists of file name components separated by slash +(@samp{/}) characters. On the systems that @theglibc{} supports, +multiple successive @samp{/} characters are equivalent to a single +@samp{/} character. + +@cindex file name resolution +The process of determining what file a file name refers to is called +@dfn{file name resolution}. This is performed by examining the +components that make up a file name in left-to-right order, and locating +each successive component in the directory named by the previous +component. Of course, each of the files that are referenced as +directories must actually exist, be directories instead of regular +files, and have the appropriate permissions to be accessible by the +process; otherwise the file name resolution fails. + +@cindex root directory +@cindex absolute file name +If a file name begins with a @samp{/}, the first component in the file +name is located in the @dfn{root directory} of the process (usually all +processes on the system have the same root directory). Such a file name +is called an @dfn{absolute file name}. +@c !!! xref here to chroot, if we ever document chroot. -rm + +@cindex relative file name +Otherwise, the first component in the file name is located in the +current working directory (@pxref{Working Directory}). This kind of +file name is called a @dfn{relative file name}. + +@cindex parent directory +The file name components @file{.} (``dot'') and @file{..} (``dot-dot'') +have special meanings. Every directory has entries for these file name +components. The file name component @file{.} refers to the directory +itself, while the file name component @file{..} refers to its +@dfn{parent directory} (the directory that contains the link for the +directory in question). As a special case, @file{..} in the root +directory refers to the root directory itself, since it has no parent; +thus @file{/..} is the same as @file{/}. + +Here are some examples of file names: + +@table @file +@item /a +The file named @file{a}, in the root directory. + +@item /a/b +The file named @file{b}, in the directory named @file{a} in the root directory. + +@item a +The file named @file{a}, in the current working directory. + +@item /a/./b +This is the same as @file{/a/b}. + +@item ./a +The file named @file{a}, in the current working directory. + +@item ../a +The file named @file{a}, in the parent directory of the current working +directory. +@end table + +@c An empty string may ``work'', but I think it's confusing to +@c try to describe it. It's not a useful thing for users to use--rms. +A file name that names a directory may optionally end in a @samp{/}. +You can specify a file name of @file{/} to refer to the root directory, +but the empty string is not a meaningful file name. If you want to +refer to the current working directory, use a file name of @file{.} or +@file{./}. + +Unlike some other operating systems, @gnusystems{} don't have any +built-in support for file types (or extensions) or file versions as part +of its file name syntax. Many programs and utilities use conventions +for file names---for example, files containing C source code usually +have names suffixed with @samp{.c}---but there is nothing in the file +system itself that enforces this kind of convention. + +@node File Name Errors, File Name Portability, File Name Resolution, File Names +@subsection File Name Errors + +@cindex file name errors +@cindex usual file name errors + +Functions that accept file name arguments usually detect these +@code{errno} error conditions relating to the file name syntax or +trouble finding the named file. These errors are referred to throughout +this manual as the @dfn{usual file name errors}. + +@table @code +@item EACCES +The process does not have search permission for a directory component +of the file name. + +@item ENAMETOOLONG +This error is used when either the total length of a file name is +greater than @code{PATH_MAX}, or when an individual file name component +has a length greater than @code{NAME_MAX}. @xref{Limits for Files}. + +On @gnuhurdsystems{}, there is no imposed limit on overall file name +length, but some file systems may place limits on the length of a +component. + +@item ENOENT +This error is reported when a file referenced as a directory component +in the file name doesn't exist, or when a component is a symbolic link +whose target file does not exist. @xref{Symbolic Links}. + +@item ENOTDIR +A file that is referenced as a directory component in the file name +exists, but it isn't a directory. + +@item ELOOP +Too many symbolic links were resolved while trying to look up the file +name. The system has an arbitrary limit on the number of symbolic links +that may be resolved in looking up a single file name, as a primitive +way to detect loops. @xref{Symbolic Links}. +@end table + + +@node File Name Portability, , File Name Errors, File Names +@subsection Portability of File Names + +The rules for the syntax of file names discussed in @ref{File Names}, +are the rules normally used by @gnusystems{} and by other POSIX +systems. However, other operating systems may use other conventions. + +There are two reasons why it can be important for you to be aware of +file name portability issues: + +@itemize @bullet +@item +If your program makes assumptions about file name syntax, or contains +embedded literal file name strings, it is more difficult to get it to +run under other operating systems that use different syntax conventions. + +@item +Even if you are not concerned about running your program on machines +that run other operating systems, it may still be possible to access +files that use different naming conventions. For example, you may be +able to access file systems on another computer running a different +operating system over a network, or read and write disks in formats used +by other operating systems. +@end itemize + +The @w{ISO C} standard says very little about file name syntax, only that +file names are strings. In addition to varying restrictions on the +length of file names and what characters can validly appear in a file +name, different operating systems use different conventions and syntax +for concepts such as structured directories and file types or +extensions. Some concepts such as file versions might be supported in +some operating systems and not by others. + +The POSIX.1 standard allows implementations to put additional +restrictions on file name syntax, concerning what characters are +permitted in file names and on the length of file name and file name +component strings. However, on @gnusystems{}, any character except +the null character is permitted in a file name string, and +on @gnuhurdsystems{} there are no limits on the length of file name +strings. diff --git a/REORG.TODO/manual/ipc.texi b/REORG.TODO/manual/ipc.texi new file mode 100644 index 0000000000..081b98fe29 --- /dev/null +++ b/REORG.TODO/manual/ipc.texi @@ -0,0 +1,116 @@ +@node Inter-Process Communication, Job Control, Processes, Top +@c %MENU% All about inter-process communication +@chapter Inter-Process Communication +@cindex ipc + +This chapter describes the @glibcadj{} inter-process communication primitives. + +@menu +* Semaphores:: Support for creating and managing semaphores +@end menu + +@node Semaphores +@section Semaphores + +@Theglibc{} implements the semaphore APIs as defined in POSIX and +System V. Semaphores can be used by multiple processes to coordinate shared +resources. The following is a complete list of the semaphore functions provided +by @theglibc{}. + +@c Need descriptions for all of these functions. + +@subsection System V Semaphores +@deftypefun int semctl (int @var{semid}, int @var{semnum}, int @var{cmd}); +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@acucorrupt{/linux}}} +@c syscall(ipc) ok +@c +@c AC-unsafe because we need to translate the new kernel +@c semid_ds buf into the userspace layout. Cancellation +@c at that point results in an inconsistent userspace +@c semid_ds. +@end deftypefun + +@deftypefun int semget (key_t @var{key}, int @var{nsems}, int @var{semflg}); +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c syscall(ipc) ok +@end deftypefun + +@deftypefun int semop (int @var{semid}, struct sembuf *@var{sops}, size_t @var{nsops}); +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c syscall(ipc) ok +@end deftypefun + +@deftypefun int semtimedop (int @var{semid}, struct sembuf *@var{sops}, size_t @var{nsops}, const struct timespec *@var{timeout}); +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c syscall(ipc) ok +@end deftypefun + +@subsection POSIX Semaphores + +@deftypefun int sem_init (sem_t *@var{sem}, int @var{pshared}, unsigned int @var{value}); +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@acucorrupt{}}} +@c Does not atomically update sem_t therefore AC-unsafe +@c because it can leave sem_t partially initialized. +@end deftypefun + +@deftypefun int sem_destroy (sem_t *@var{sem}); +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Function does nothing and is therefore always safe. +@end deftypefun + +@deftypefun sem_t *sem_open (const char *@var{name}, int @var{oflag}, ...); +@safety{@prelim{}@mtsafe{}@asunsafe{@asuinit{}}@acunsafe{@acuinit{}}} +@c pthread_once asuinit +@c +@c We are AC-Unsafe becuase we use pthread_once to initialize +@c a global variable that holds the location of the mounted +@c shmfs on Linux. +@end deftypefun + +@deftypefun int sem_close (sem_t *@var{sem}); +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c lll_lock asulock aculock +@c twalk mtsrace{:root} +@c +@c We are AS-unsafe because we take a non-recursive lock. +@c We are AC-unsafe because several internal data structures +@c are not updated atomically. +@end deftypefun + +@deftypefun int sem_unlink (const char *@var{name}); +@safety{@prelim{}@mtsafe{}@asunsafe{@asuinit{}}@acunsafe{@acucorrupt{}}} +@c pthread_once asuinit acucorrupt aculock +@c mempcpy acucorrupt +@end deftypefun + +@deftypefun int sem_wait (sem_t *@var{sem}); +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@acucorrupt{}}} +@c atomic_increment (nwaiters) acucorrupt +@c +@c Given the use atomic operations this function seems +@c to be AS-safe. It is AC-unsafe because there is still +@c a window between atomic_decrement and the pthread_push +@c of the handler that undoes that operation. A cancellation +@c at that point would fail to remove the process from the +@c waiters count. +@end deftypefun + +@deftypefun int sem_timedwait (sem_t *@var{sem}, const struct timespec *@var{abstime}); +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@acucorrupt{}}} +@c Same safety issues as sem_wait. +@end deftypefun + +@deftypefun int sem_trywait (sem_t *@var{sem}); +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c All atomic operations are safe in all contexts. +@end deftypefun + +@deftypefun int sem_post (sem_t *@var{sem}); +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Same safety as sem_trywait. +@end deftypefun + +@deftypefun int sem_getvalue (sem_t *@var{sem}, int *@var{sval}); +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Atomic write of a value is safe in all contexts. +@end deftypefun diff --git a/REORG.TODO/manual/job.texi b/REORG.TODO/manual/job.texi new file mode 100644 index 0000000000..72b55997d2 --- /dev/null +++ b/REORG.TODO/manual/job.texi @@ -0,0 +1,1319 @@ +@node Job Control, Name Service Switch, Inter-Process Communication, Top +@c %MENU% All about process groups and sessions +@chapter Job Control + +@cindex process groups +@cindex job control +@cindex job +@cindex session +@dfn{Job control} refers to the protocol for allowing a user to move +between multiple @dfn{process groups} (or @dfn{jobs}) within a single +@dfn{login session}. The job control facilities are set up so that +appropriate behavior for most programs happens automatically and they +need not do anything special about job control. So you can probably +ignore the material in this chapter unless you are writing a shell or +login program. + +You need to be familiar with concepts relating to process creation +(@pxref{Process Creation Concepts}) and signal handling (@pxref{Signal +Handling}) in order to understand this material presented in this +chapter. + +@menu +* Concepts of Job Control:: Jobs can be controlled by a shell. +* Job Control is Optional:: Not all POSIX systems support job control. +* Controlling Terminal:: How a process gets its controlling terminal. +* Access to the Terminal:: How processes share the controlling terminal. +* Orphaned Process Groups:: Jobs left after the user logs out. +* Implementing a Shell:: What a shell must do to implement job control. +* Functions for Job Control:: Functions to control process groups. +@end menu + +@node Concepts of Job Control, Job Control is Optional, , Job Control +@section Concepts of Job Control + +@cindex shell +The fundamental purpose of an interactive shell is to read +commands from the user's terminal and create processes to execute the +programs specified by those commands. It can do this using the +@code{fork} (@pxref{Creating a Process}) and @code{exec} +(@pxref{Executing a File}) functions. + +A single command may run just one process---but often one command uses +several processes. If you use the @samp{|} operator in a shell command, +you explicitly request several programs in their own processes. But +even if you run just one program, it can use multiple processes +internally. For example, a single compilation command such as @samp{cc +-c foo.c} typically uses four processes (though normally only two at any +given time). If you run @code{make}, its job is to run other programs +in separate processes. + +The processes belonging to a single command are called a @dfn{process +group} or @dfn{job}. This is so that you can operate on all of them at +once. For example, typing @kbd{C-c} sends the signal @code{SIGINT} to +terminate all the processes in the foreground process group. + +@cindex session +A @dfn{session} is a larger group of processes. Normally all the +processes that stem from a single login belong to the same session. + +Every process belongs to a process group. When a process is created, it +becomes a member of the same process group and session as its parent +process. You can put it in another process group using the +@code{setpgid} function, provided the process group belongs to the same +session. + +@cindex session leader +The only way to put a process in a different session is to make it the +initial process of a new session, or a @dfn{session leader}, using the +@code{setsid} function. This also puts the session leader into a new +process group, and you can't move it out of that process group again. + +Usually, new sessions are created by the system login program, and the +session leader is the process running the user's login shell. + +@cindex controlling terminal +A shell that supports job control must arrange to control which job can +use the terminal at any time. Otherwise there might be multiple jobs +trying to read from the terminal at once, and confusion about which +process should receive the input typed by the user. To prevent this, +the shell must cooperate with the terminal driver using the protocol +described in this chapter. + +@cindex foreground job +@cindex background job +The shell can give unlimited access to the controlling terminal to only +one process group at a time. This is called the @dfn{foreground job} on +that controlling terminal. Other process groups managed by the shell +that are executing without such access to the terminal are called +@dfn{background jobs}. + +@cindex stopped job +If a background job needs to read from its controlling +terminal, it is @dfn{stopped} by the terminal driver; if the +@code{TOSTOP} mode is set, likewise for writing. The user can stop +a foreground job by typing the SUSP character (@pxref{Special +Characters}) and a program can stop any job by sending it a +@code{SIGSTOP} signal. It's the responsibility of the shell to notice +when jobs stop, to notify the user about them, and to provide mechanisms +for allowing the user to interactively continue stopped jobs and switch +jobs between foreground and background. + +@xref{Access to the Terminal}, for more information about I/O to the +controlling terminal. + +@node Job Control is Optional, Controlling Terminal, Concepts of Job Control , Job Control +@section Job Control is Optional +@cindex job control is optional + +Not all operating systems support job control. @gnusystems{} do +support job control, but if you are using @theglibc{} on some other +system, that system may not support job control itself. + +You can use the @code{_POSIX_JOB_CONTROL} macro to test at compile-time +whether the system supports job control. @xref{System Options}. + +If job control is not supported, then there can be only one process +group per session, which behaves as if it were always in the foreground. +The functions for creating additional process groups simply fail with +the error code @code{ENOSYS}. + +The macros naming the various job control signals (@pxref{Job Control +Signals}) are defined even if job control is not supported. However, +the system never generates these signals, and attempts to send a job +control signal or examine or specify their actions report errors or do +nothing. + + +@node Controlling Terminal, Access to the Terminal, Job Control is Optional, Job Control +@section Controlling Terminal of a Process + +One of the attributes of a process is its controlling terminal. Child +processes created with @code{fork} inherit the controlling terminal from +their parent process. In this way, all the processes in a session +inherit the controlling terminal from the session leader. A session +leader that has control of a terminal is called the @dfn{controlling +process} of that terminal. + +@cindex controlling process +You generally do not need to worry about the exact mechanism used to +allocate a controlling terminal to a session, since it is done for you +by the system when you log in. +@c ??? How does GNU system let a process get a ctl terminal. + +An individual process disconnects from its controlling terminal when it +calls @code{setsid} to become the leader of a new session. +@xref{Process Group Functions}. + +@c !!! explain how it gets a new one (by opening any terminal) +@c ??? How you get a controlling terminal is system-dependent. +@c We should document how this will work in the GNU system when it is decided. +@c What Unix does is not clean and I don't think GNU should use that. + +@node Access to the Terminal, Orphaned Process Groups, Controlling Terminal, Job Control +@section Access to the Controlling Terminal +@cindex controlling terminal, access to + +Processes in the foreground job of a controlling terminal have +unrestricted access to that terminal; background processes do not. This +section describes in more detail what happens when a process in a +background job tries to access its controlling terminal. + +@cindex @code{SIGTTIN}, from background job +When a process in a background job tries to read from its controlling +terminal, the process group is usually sent a @code{SIGTTIN} signal. +This normally causes all of the processes in that group to stop (unless +they handle the signal and don't stop themselves). However, if the +reading process is ignoring or blocking this signal, then @code{read} +fails with an @code{EIO} error instead. + +@cindex @code{SIGTTOU}, from background job +Similarly, when a process in a background job tries to write to its +controlling terminal, the default behavior is to send a @code{SIGTTOU} +signal to the process group. However, the behavior is modified by the +@code{TOSTOP} bit of the local modes flags (@pxref{Local Modes}). If +this bit is not set (which is the default), then writing to the +controlling terminal is always permitted without sending a signal. +Writing is also permitted if the @code{SIGTTOU} signal is being ignored +or blocked by the writing process. + +Most other terminal operations that a program can do are treated as +reading or as writing. (The description of each operation should say +which.) + +For more information about the primitive @code{read} and @code{write} +functions, see @ref{I/O Primitives}. + + +@node Orphaned Process Groups, Implementing a Shell, Access to the Terminal, Job Control +@section Orphaned Process Groups +@cindex orphaned process group + +When a controlling process terminates, its terminal becomes free and a +new session can be established on it. (In fact, another user could log +in on the terminal.) This could cause a problem if any processes from +the old session are still trying to use that terminal. + +To prevent problems, process groups that continue running even after the +session leader has terminated are marked as @dfn{orphaned process +groups}. + +When a process group becomes an orphan, its processes are sent a +@code{SIGHUP} signal. Ordinarily, this causes the processes to +terminate. However, if a program ignores this signal or establishes a +handler for it (@pxref{Signal Handling}), it can continue running as in +the orphan process group even after its controlling process terminates; +but it still cannot access the terminal any more. + +@node Implementing a Shell, Functions for Job Control, Orphaned Process Groups, Job Control +@section Implementing a Job Control Shell + +This section describes what a shell must do to implement job control, by +presenting an extensive sample program to illustrate the concepts +involved. + +@iftex +@itemize @bullet +@item +@ref{Data Structures}, introduces the example and presents +its primary data structures. + +@item +@ref{Initializing the Shell}, discusses actions which the shell must +perform to prepare for job control. + +@item +@ref{Launching Jobs}, includes information about how to create jobs +to execute commands. + +@item +@ref{Foreground and Background}, discusses what the shell should +do differently when launching a job in the foreground as opposed to +a background job. + +@item +@ref{Stopped and Terminated Jobs}, discusses reporting of job status +back to the shell. + +@item +@ref{Continuing Stopped Jobs}, tells you how to continue jobs that +have been stopped. + +@item +@ref{Missing Pieces}, discusses other parts of the shell. +@end itemize +@end iftex + +@menu +* Data Structures:: Introduction to the sample shell. +* Initializing the Shell:: What the shell must do to take + responsibility for job control. +* Launching Jobs:: Creating jobs to execute commands. +* Foreground and Background:: Putting a job in foreground of background. +* Stopped and Terminated Jobs:: Reporting job status. +* Continuing Stopped Jobs:: How to continue a stopped job in + the foreground or background. +* Missing Pieces:: Other parts of the shell. +@end menu + +@node Data Structures, Initializing the Shell, , Implementing a Shell +@subsection Data Structures for the Shell + +All of the program examples included in this chapter are part of +a simple shell program. This section presents data structures +and utility functions which are used throughout the example. + +The sample shell deals mainly with two data structures. The +@code{job} type contains information about a job, which is a +set of subprocesses linked together with pipes. The @code{process} type +holds information about a single subprocess. Here are the relevant +data structure declarations: + +@smallexample +@group +/* @r{A process is a single process.} */ +typedef struct process +@{ + struct process *next; /* @r{next process in pipeline} */ + char **argv; /* @r{for exec} */ + pid_t pid; /* @r{process ID} */ + char completed; /* @r{true if process has completed} */ + char stopped; /* @r{true if process has stopped} */ + int status; /* @r{reported status value} */ +@} process; +@end group + +@group +/* @r{A job is a pipeline of processes.} */ +typedef struct job +@{ + struct job *next; /* @r{next active job} */ + char *command; /* @r{command line, used for messages} */ + process *first_process; /* @r{list of processes in this job} */ + pid_t pgid; /* @r{process group ID} */ + char notified; /* @r{true if user told about stopped job} */ + struct termios tmodes; /* @r{saved terminal modes} */ + int stdin, stdout, stderr; /* @r{standard i/o channels} */ +@} job; + +/* @r{The active jobs are linked into a list. This is its head.} */ +job *first_job = NULL; +@end group +@end smallexample + +Here are some utility functions that are used for operating on @code{job} +objects. + +@smallexample +@group +/* @r{Find the active job with the indicated @var{pgid}.} */ +job * +find_job (pid_t pgid) +@{ + job *j; + + for (j = first_job; j; j = j->next) + if (j->pgid == pgid) + return j; + return NULL; +@} +@end group + +@group +/* @r{Return true if all processes in the job have stopped or completed.} */ +int +job_is_stopped (job *j) +@{ + process *p; + + for (p = j->first_process; p; p = p->next) + if (!p->completed && !p->stopped) + return 0; + return 1; +@} +@end group + +@group +/* @r{Return true if all processes in the job have completed.} */ +int +job_is_completed (job *j) +@{ + process *p; + + for (p = j->first_process; p; p = p->next) + if (!p->completed) + return 0; + return 1; +@} +@end group +@end smallexample + + +@node Initializing the Shell, Launching Jobs, Data Structures, Implementing a Shell +@subsection Initializing the Shell +@cindex job control, enabling +@cindex subshell + +When a shell program that normally performs job control is started, it +has to be careful in case it has been invoked from another shell that is +already doing its own job control. + +A subshell that runs interactively has to ensure that it has been placed +in the foreground by its parent shell before it can enable job control +itself. It does this by getting its initial process group ID with the +@code{getpgrp} function, and comparing it to the process group ID of the +current foreground job associated with its controlling terminal (which +can be retrieved using the @code{tcgetpgrp} function). + +If the subshell is not running as a foreground job, it must stop itself +by sending a @code{SIGTTIN} signal to its own process group. It may not +arbitrarily put itself into the foreground; it must wait for the user to +tell the parent shell to do this. If the subshell is continued again, +it should repeat the check and stop itself again if it is still not in +the foreground. + +@cindex job control, enabling +Once the subshell has been placed into the foreground by its parent +shell, it can enable its own job control. It does this by calling +@code{setpgid} to put itself into its own process group, and then +calling @code{tcsetpgrp} to place this process group into the +foreground. + +When a shell enables job control, it should set itself to ignore all the +job control stop signals so that it doesn't accidentally stop itself. +You can do this by setting the action for all the stop signals to +@code{SIG_IGN}. + +A subshell that runs non-interactively cannot and should not support job +control. It must leave all processes it creates in the same process +group as the shell itself; this allows the non-interactive shell and its +child processes to be treated as a single job by the parent shell. This +is easy to do---just don't use any of the job control primitives---but +you must remember to make the shell do it. + + +Here is the initialization code for the sample shell that shows how to +do all of this. + +@smallexample +/* @r{Keep track of attributes of the shell.} */ + +#include <sys/types.h> +#include <termios.h> +#include <unistd.h> + +pid_t shell_pgid; +struct termios shell_tmodes; +int shell_terminal; +int shell_is_interactive; + + +/* @r{Make sure the shell is running interactively as the foreground job} + @r{before proceeding.} */ + +void +init_shell () +@{ + + /* @r{See if we are running interactively.} */ + shell_terminal = STDIN_FILENO; + shell_is_interactive = isatty (shell_terminal); + + if (shell_is_interactive) + @{ + /* @r{Loop until we are in the foreground.} */ + while (tcgetpgrp (shell_terminal) != (shell_pgid = getpgrp ())) + kill (- shell_pgid, SIGTTIN); + + /* @r{Ignore interactive and job-control signals.} */ + signal (SIGINT, SIG_IGN); + signal (SIGQUIT, SIG_IGN); + signal (SIGTSTP, SIG_IGN); + signal (SIGTTIN, SIG_IGN); + signal (SIGTTOU, SIG_IGN); + signal (SIGCHLD, SIG_IGN); + + /* @r{Put ourselves in our own process group.} */ + shell_pgid = getpid (); + if (setpgid (shell_pgid, shell_pgid) < 0) + @{ + perror ("Couldn't put the shell in its own process group"); + exit (1); + @} + + /* @r{Grab control of the terminal.} */ + tcsetpgrp (shell_terminal, shell_pgid); + + /* @r{Save default terminal attributes for shell.} */ + tcgetattr (shell_terminal, &shell_tmodes); + @} +@} +@end smallexample + + +@node Launching Jobs, Foreground and Background, Initializing the Shell, Implementing a Shell +@subsection Launching Jobs +@cindex launching jobs + +Once the shell has taken responsibility for performing job control on +its controlling terminal, it can launch jobs in response to commands +typed by the user. + +To create the processes in a process group, you use the same @code{fork} +and @code{exec} functions described in @ref{Process Creation Concepts}. +Since there are multiple child processes involved, though, things are a +little more complicated and you must be careful to do things in the +right order. Otherwise, nasty race conditions can result. + +You have two choices for how to structure the tree of parent-child +relationships among the processes. You can either make all the +processes in the process group be children of the shell process, or you +can make one process in group be the ancestor of all the other processes +in that group. The sample shell program presented in this chapter uses +the first approach because it makes bookkeeping somewhat simpler. + +@cindex process group leader +@cindex process group ID +As each process is forked, it should put itself in the new process group +by calling @code{setpgid}; see @ref{Process Group Functions}. The first +process in the new group becomes its @dfn{process group leader}, and its +process ID becomes the @dfn{process group ID} for the group. + +@cindex race conditions, relating to job control +The shell should also call @code{setpgid} to put each of its child +processes into the new process group. This is because there is a +potential timing problem: each child process must be put in the process +group before it begins executing a new program, and the shell depends on +having all the child processes in the group before it continues +executing. If both the child processes and the shell call +@code{setpgid}, this ensures that the right things happen no matter which +process gets to it first. + +If the job is being launched as a foreground job, the new process group +also needs to be put into the foreground on the controlling terminal +using @code{tcsetpgrp}. Again, this should be done by the shell as well +as by each of its child processes, to avoid race conditions. + +The next thing each child process should do is to reset its signal +actions. + +During initialization, the shell process set itself to ignore job +control signals; see @ref{Initializing the Shell}. As a result, any child +processes it creates also ignore these signals by inheritance. This is +definitely undesirable, so each child process should explicitly set the +actions for these signals back to @code{SIG_DFL} just after it is forked. + +Since shells follow this convention, applications can assume that they +inherit the correct handling of these signals from the parent process. +But every application has a responsibility not to mess up the handling +of stop signals. Applications that disable the normal interpretation of +the SUSP character should provide some other mechanism for the user to +stop the job. When the user invokes this mechanism, the program should +send a @code{SIGTSTP} signal to the process group of the process, not +just to the process itself. @xref{Signaling Another Process}. + +Finally, each child process should call @code{exec} in the normal way. +This is also the point at which redirection of the standard input and +output channels should be handled. @xref{Duplicating Descriptors}, +for an explanation of how to do this. + +Here is the function from the sample shell program that is responsible +for launching a program. The function is executed by each child process +immediately after it has been forked by the shell, and never returns. + +@smallexample +void +launch_process (process *p, pid_t pgid, + int infile, int outfile, int errfile, + int foreground) +@{ + pid_t pid; + + if (shell_is_interactive) + @{ + /* @r{Put the process into the process group and give the process group} + @r{the terminal, if appropriate.} + @r{This has to be done both by the shell and in the individual} + @r{child processes because of potential race conditions.} */ + pid = getpid (); + if (pgid == 0) pgid = pid; + setpgid (pid, pgid); + if (foreground) + tcsetpgrp (shell_terminal, pgid); + + /* @r{Set the handling for job control signals back to the default.} */ + signal (SIGINT, SIG_DFL); + signal (SIGQUIT, SIG_DFL); + signal (SIGTSTP, SIG_DFL); + signal (SIGTTIN, SIG_DFL); + signal (SIGTTOU, SIG_DFL); + signal (SIGCHLD, SIG_DFL); + @} + + /* @r{Set the standard input/output channels of the new process.} */ + if (infile != STDIN_FILENO) + @{ + dup2 (infile, STDIN_FILENO); + close (infile); + @} + if (outfile != STDOUT_FILENO) + @{ + dup2 (outfile, STDOUT_FILENO); + close (outfile); + @} + if (errfile != STDERR_FILENO) + @{ + dup2 (errfile, STDERR_FILENO); + close (errfile); + @} + + /* @r{Exec the new process. Make sure we exit.} */ + execvp (p->argv[0], p->argv); + perror ("execvp"); + exit (1); +@} +@end smallexample + +If the shell is not running interactively, this function does not do +anything with process groups or signals. Remember that a shell not +performing job control must keep all of its subprocesses in the same +process group as the shell itself. + +Next, here is the function that actually launches a complete job. +After creating the child processes, this function calls some other +functions to put the newly created job into the foreground or background; +these are discussed in @ref{Foreground and Background}. + +@smallexample +void +launch_job (job *j, int foreground) +@{ + process *p; + pid_t pid; + int mypipe[2], infile, outfile; + + infile = j->stdin; + for (p = j->first_process; p; p = p->next) + @{ + /* @r{Set up pipes, if necessary.} */ + if (p->next) + @{ + if (pipe (mypipe) < 0) + @{ + perror ("pipe"); + exit (1); + @} + outfile = mypipe[1]; + @} + else + outfile = j->stdout; + + /* @r{Fork the child processes.} */ + pid = fork (); + if (pid == 0) + /* @r{This is the child process.} */ + launch_process (p, j->pgid, infile, + outfile, j->stderr, foreground); + else if (pid < 0) + @{ + /* @r{The fork failed.} */ + perror ("fork"); + exit (1); + @} + else + @{ + /* @r{This is the parent process.} */ + p->pid = pid; + if (shell_is_interactive) + @{ + if (!j->pgid) + j->pgid = pid; + setpgid (pid, j->pgid); + @} + @} + + /* @r{Clean up after pipes.} */ + if (infile != j->stdin) + close (infile); + if (outfile != j->stdout) + close (outfile); + infile = mypipe[0]; + @} + + format_job_info (j, "launched"); + + if (!shell_is_interactive) + wait_for_job (j); + else if (foreground) + put_job_in_foreground (j, 0); + else + put_job_in_background (j, 0); +@} +@end smallexample + + +@node Foreground and Background, Stopped and Terminated Jobs, Launching Jobs, Implementing a Shell +@subsection Foreground and Background + +Now let's consider what actions must be taken by the shell when it +launches a job into the foreground, and how this differs from what +must be done when a background job is launched. + +@cindex foreground job, launching +When a foreground job is launched, the shell must first give it access +to the controlling terminal by calling @code{tcsetpgrp}. Then, the +shell should wait for processes in that process group to terminate or +stop. This is discussed in more detail in @ref{Stopped and Terminated +Jobs}. + +When all of the processes in the group have either completed or stopped, +the shell should regain control of the terminal for its own process +group by calling @code{tcsetpgrp} again. Since stop signals caused by +I/O from a background process or a SUSP character typed by the user +are sent to the process group, normally all the processes in the job +stop together. + +The foreground job may have left the terminal in a strange state, so the +shell should restore its own saved terminal modes before continuing. In +case the job is merely stopped, the shell should first save the current +terminal modes so that it can restore them later if the job is +continued. The functions for dealing with terminal modes are +@code{tcgetattr} and @code{tcsetattr}; these are described in +@ref{Terminal Modes}. + +Here is the sample shell's function for doing all of this. + +@smallexample +@group +/* @r{Put job @var{j} in the foreground. If @var{cont} is nonzero,} + @r{restore the saved terminal modes and send the process group a} + @r{@code{SIGCONT} signal to wake it up before we block.} */ + +void +put_job_in_foreground (job *j, int cont) +@{ + /* @r{Put the job into the foreground.} */ + tcsetpgrp (shell_terminal, j->pgid); +@end group + +@group + /* @r{Send the job a continue signal, if necessary.} */ + if (cont) + @{ + tcsetattr (shell_terminal, TCSADRAIN, &j->tmodes); + if (kill (- j->pgid, SIGCONT) < 0) + perror ("kill (SIGCONT)"); + @} +@end group + + /* @r{Wait for it to report.} */ + wait_for_job (j); + + /* @r{Put the shell back in the foreground.} */ + tcsetpgrp (shell_terminal, shell_pgid); + +@group + /* @r{Restore the shell's terminal modes.} */ + tcgetattr (shell_terminal, &j->tmodes); + tcsetattr (shell_terminal, TCSADRAIN, &shell_tmodes); +@} +@end group +@end smallexample + +@cindex background job, launching +If the process group is launched as a background job, the shell should +remain in the foreground itself and continue to read commands from +the terminal. + +In the sample shell, there is not much that needs to be done to put +a job into the background. Here is the function it uses: + +@smallexample +/* @r{Put a job in the background. If the cont argument is true, send} + @r{the process group a @code{SIGCONT} signal to wake it up.} */ + +void +put_job_in_background (job *j, int cont) +@{ + /* @r{Send the job a continue signal, if necessary.} */ + if (cont) + if (kill (-j->pgid, SIGCONT) < 0) + perror ("kill (SIGCONT)"); +@} +@end smallexample + + +@node Stopped and Terminated Jobs, Continuing Stopped Jobs, Foreground and Background, Implementing a Shell +@subsection Stopped and Terminated Jobs + +@cindex stopped jobs, detecting +@cindex terminated jobs, detecting +When a foreground process is launched, the shell must block until all of +the processes in that job have either terminated or stopped. It can do +this by calling the @code{waitpid} function; see @ref{Process +Completion}. Use the @code{WUNTRACED} option so that status is reported +for processes that stop as well as processes that terminate. + +The shell must also check on the status of background jobs so that it +can report terminated and stopped jobs to the user; this can be done by +calling @code{waitpid} with the @code{WNOHANG} option. A good place to +put a such a check for terminated and stopped jobs is just before +prompting for a new command. + +@cindex @code{SIGCHLD}, handling of +The shell can also receive asynchronous notification that there is +status information available for a child process by establishing a +handler for @code{SIGCHLD} signals. @xref{Signal Handling}. + +In the sample shell program, the @code{SIGCHLD} signal is normally +ignored. This is to avoid reentrancy problems involving the global data +structures the shell manipulates. But at specific times when the shell +is not using these data structures---such as when it is waiting for +input on the terminal---it makes sense to enable a handler for +@code{SIGCHLD}. The same function that is used to do the synchronous +status checks (@code{do_job_notification}, in this case) can also be +called from within this handler. + +Here are the parts of the sample shell program that deal with checking +the status of jobs and reporting the information to the user. + +@smallexample +@group +/* @r{Store the status of the process @var{pid} that was returned by waitpid.} + @r{Return 0 if all went well, nonzero otherwise.} */ + +int +mark_process_status (pid_t pid, int status) +@{ + job *j; + process *p; +@end group + +@group + if (pid > 0) + @{ + /* @r{Update the record for the process.} */ + for (j = first_job; j; j = j->next) + for (p = j->first_process; p; p = p->next) + if (p->pid == pid) + @{ + p->status = status; + if (WIFSTOPPED (status)) + p->stopped = 1; + else + @{ + p->completed = 1; + if (WIFSIGNALED (status)) + fprintf (stderr, "%d: Terminated by signal %d.\n", + (int) pid, WTERMSIG (p->status)); + @} + return 0; + @} + fprintf (stderr, "No child process %d.\n", pid); + return -1; + @} +@end group +@group + else if (pid == 0 || errno == ECHILD) + /* @r{No processes ready to report.} */ + return -1; + else @{ + /* @r{Other weird errors.} */ + perror ("waitpid"); + return -1; + @} +@} +@end group + +@group +/* @r{Check for processes that have status information available,} + @r{without blocking.} */ + +void +update_status (void) +@{ + int status; + pid_t pid; + + do + pid = waitpid (WAIT_ANY, &status, WUNTRACED|WNOHANG); + while (!mark_process_status (pid, status)); +@} +@end group + +@group +/* @r{Check for processes that have status information available,} + @r{blocking until all processes in the given job have reported.} */ + +void +wait_for_job (job *j) +@{ + int status; + pid_t pid; + + do + pid = waitpid (WAIT_ANY, &status, WUNTRACED); + while (!mark_process_status (pid, status) + && !job_is_stopped (j) + && !job_is_completed (j)); +@} +@end group + +@group +/* @r{Format information about job status for the user to look at.} */ + +void +format_job_info (job *j, const char *status) +@{ + fprintf (stderr, "%ld (%s): %s\n", (long)j->pgid, status, j->command); +@} +@end group + +@group +/* @r{Notify the user about stopped or terminated jobs.} + @r{Delete terminated jobs from the active job list.} */ + +void +do_job_notification (void) +@{ + job *j, *jlast, *jnext; + process *p; + + /* @r{Update status information for child processes.} */ + update_status (); + + jlast = NULL; + for (j = first_job; j; j = jnext) + @{ + jnext = j->next; + + /* @r{If all processes have completed, tell the user the job has} + @r{completed and delete it from the list of active jobs.} */ + if (job_is_completed (j)) @{ + format_job_info (j, "completed"); + if (jlast) + jlast->next = jnext; + else + first_job = jnext; + free_job (j); + @} + + /* @r{Notify the user about stopped jobs,} + @r{marking them so that we won't do this more than once.} */ + else if (job_is_stopped (j) && !j->notified) @{ + format_job_info (j, "stopped"); + j->notified = 1; + jlast = j; + @} + + /* @r{Don't say anything about jobs that are still running.} */ + else + jlast = j; + @} +@} +@end group +@end smallexample + +@node Continuing Stopped Jobs, Missing Pieces, Stopped and Terminated Jobs, Implementing a Shell +@subsection Continuing Stopped Jobs + +@cindex stopped jobs, continuing +The shell can continue a stopped job by sending a @code{SIGCONT} signal +to its process group. If the job is being continued in the foreground, +the shell should first invoke @code{tcsetpgrp} to give the job access to +the terminal, and restore the saved terminal settings. After continuing +a job in the foreground, the shell should wait for the job to stop or +complete, as if the job had just been launched in the foreground. + +The sample shell program handles both newly created and continued jobs +with the same pair of functions, @w{@code{put_job_in_foreground}} and +@w{@code{put_job_in_background}}. The definitions of these functions +were given in @ref{Foreground and Background}. When continuing a +stopped job, a nonzero value is passed as the @var{cont} argument to +ensure that the @code{SIGCONT} signal is sent and the terminal modes +reset, as appropriate. + +This leaves only a function for updating the shell's internal bookkeeping +about the job being continued: + +@smallexample +@group +/* @r{Mark a stopped job J as being running again.} */ + +void +mark_job_as_running (job *j) +@{ + Process *p; + + for (p = j->first_process; p; p = p->next) + p->stopped = 0; + j->notified = 0; +@} +@end group + +@group +/* @r{Continue the job J.} */ + +void +continue_job (job *j, int foreground) +@{ + mark_job_as_running (j); + if (foreground) + put_job_in_foreground (j, 1); + else + put_job_in_background (j, 1); +@} +@end group +@end smallexample + +@node Missing Pieces, , Continuing Stopped Jobs, Implementing a Shell +@subsection The Missing Pieces + +The code extracts for the sample shell included in this chapter are only +a part of the entire shell program. In particular, nothing at all has +been said about how @code{job} and @code{program} data structures are +allocated and initialized. + +Most real shells provide a complex user interface that has support for +a command language; variables; abbreviations, substitutions, and pattern +matching on file names; and the like. All of this is far too complicated +to explain here! Instead, we have concentrated on showing how to +implement the core process creation and job control functions that can +be called from such a shell. + +Here is a table summarizing the major entry points we have presented: + +@table @code +@item void init_shell (void) +Initialize the shell's internal state. @xref{Initializing the +Shell}. + +@item void launch_job (job *@var{j}, int @var{foreground}) +Launch the job @var{j} as either a foreground or background job. +@xref{Launching Jobs}. + +@item void do_job_notification (void) +Check for and report any jobs that have terminated or stopped. Can be +called synchronously or within a handler for @code{SIGCHLD} signals. +@xref{Stopped and Terminated Jobs}. + +@item void continue_job (job *@var{j}, int @var{foreground}) +Continue the job @var{j}. @xref{Continuing Stopped Jobs}. +@end table + +Of course, a real shell would also want to provide other functions for +managing jobs. For example, it would be useful to have commands to list +all active jobs or to send a signal (such as @code{SIGKILL}) to a job. + + +@node Functions for Job Control, , Implementing a Shell, Job Control +@section Functions for Job Control +@cindex process group functions +@cindex job control functions + +This section contains detailed descriptions of the functions relating +to job control. + +@menu +* Identifying the Terminal:: Determining the controlling terminal's name. +* Process Group Functions:: Functions for manipulating process groups. +* Terminal Access Functions:: Functions for controlling terminal access. +@end menu + + +@node Identifying the Terminal, Process Group Functions, , Functions for Job Control +@subsection Identifying the Controlling Terminal +@cindex controlling terminal, determining + +You can use the @code{ctermid} function to get a file name that you can +use to open the controlling terminal. In @theglibc{}, it returns +the same string all the time: @code{"/dev/tty"}. That is a special +``magic'' file name that refers to the controlling terminal of the +current process (if it has one). To find the name of the specific +terminal device, use @code{ttyname}; @pxref{Is It a Terminal}. + +The function @code{ctermid} is declared in the header file +@file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment POSIX.1 +@deftypefun {char *} ctermid (char *@var{string}) +@safety{@prelim{}@mtsafe{@mtsposix{/!string}}@assafe{}@acsafe{}} +@c This function is a stub by default; the actual implementation, for +@c posix systems, returns a pointer to a string literal if passed a NULL +@c string. It's not clear we want to commit to being MT-Safe in the +@c !string case, so maybe add mtasurace{:ctermid/!string} when we take +@c prelim out, to make room for using a static buffer in the future. +The @code{ctermid} function returns a string containing the file name of +the controlling terminal for the current process. If @var{string} is +not a null pointer, it should be an array that can hold at least +@code{L_ctermid} characters; the string is returned in this array. +Otherwise, a pointer to a string in a static area is returned, which +might get overwritten on subsequent calls to this function. + +An empty string is returned if the file name cannot be determined for +any reason. Even if a file name is returned, access to the file it +represents is not guaranteed. +@end deftypefun + +@comment stdio.h +@comment POSIX.1 +@deftypevr Macro int L_ctermid +The value of this macro is an integer constant expression that +represents the size of a string large enough to hold the file name +returned by @code{ctermid}. +@end deftypevr + +See also the @code{isatty} and @code{ttyname} functions, in +@ref{Is It a Terminal}. + + +@node Process Group Functions, Terminal Access Functions, Identifying the Terminal, Functions for Job Control +@subsection Process Group Functions + +Here are descriptions of the functions for manipulating process groups. +Your program should include the header files @file{sys/types.h} and +@file{unistd.h} to use these functions. +@pindex unistd.h +@pindex sys/types.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun pid_t setsid (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is usually a direct syscall, but if a syscall is not available, +@c we use a stub, or Hurd- and BSD-specific implementations. The former +@c uses a mutex and a hurd critical section, and the latter issues a few +@c syscalls, so both seem safe, the locking on Hurd is safe because of +@c the critical section. +The @code{setsid} function creates a new session. The calling process +becomes the session leader, and is put in a new process group whose +process group ID is the same as the process ID of that process. There +are initially no other processes in the new process group, and no other +process groups in the new session. + +This function also makes the calling process have no controlling terminal. + +The @code{setsid} function returns the new process group ID of the +calling process if successful. A return value of @code{-1} indicates an +error. The following @code{errno} error conditions are defined for this +function: + +@table @code +@item EPERM +The calling process is already a process group leader, or there is +already another process group around that has the same process group ID. +@end table +@end deftypefun + +@comment unistd.h +@comment SVID +@deftypefun pid_t getsid (pid_t @var{pid}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Stub or direct syscall, except on hurd, where it is equally safe. + +The @code{getsid} function returns the process group ID of the session +leader of the specified process. If a @var{pid} is @code{0}, the +process group ID of the session leader of the current process is +returned. + +In case of error @code{-1} is returned and @code{errno} is set. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item ESRCH +There is no process with the given process ID @var{pid}. +@item EPERM +The calling process and the process specified by @var{pid} are in +different sessions, and the implementation doesn't allow to access the +process group ID of the session leader of the process with ID @var{pid} +from the calling process. +@end table +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun pid_t getpgrp (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getpgrp} function returns the process group ID of +the calling process. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int getpgid (pid_t @var{pid}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Stub or direct syscall, except on hurd, where it is equally safe. + +The @code{getpgid} function +returns the process group ID of the process @var{pid}. You can supply a +value of @code{0} for the @var{pid} argument to get information about +the calling process. + +In case of error @code{-1} is returned and @code{errno} is set. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item ESRCH +There is no process with the given process ID @var{pid}. +The calling process and the process specified by @var{pid} are in +different sessions, and the implementation doesn't allow to access the +process group ID of the process with ID @var{pid} from the calling +process. +@end table +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int setpgid (pid_t @var{pid}, pid_t @var{pgid}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Stub or direct syscall, except on hurd, where it is equally safe. +The @code{setpgid} function puts the process @var{pid} into the process +group @var{pgid}. As a special case, either @var{pid} or @var{pgid} can +be zero to indicate the process ID of the calling process. + +This function fails on a system that does not support job control. +@xref{Job Control is Optional}, for more information. + +If the operation is successful, @code{setpgid} returns zero. Otherwise +it returns @code{-1}. The following @code{errno} error conditions are +defined for this function: + +@table @code +@item EACCES +The child process named by @var{pid} has executed an @code{exec} +function since it was forked. + +@item EINVAL +The value of the @var{pgid} is not valid. + +@item ENOSYS +The system doesn't support job control. + +@item EPERM +The process indicated by the @var{pid} argument is a session leader, +or is not in the same session as the calling process, or the value of +the @var{pgid} argument doesn't match a process group ID in the same +session as the calling process. + +@item ESRCH +The process indicated by the @var{pid} argument is not the calling +process or a child of the calling process. +@end table +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun int setpgrp (pid_t @var{pid}, pid_t @var{pgid}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall or setpgid wrapper. +This is the BSD Unix name for @code{setpgid}. Both functions do exactly +the same thing. +@end deftypefun + + +@node Terminal Access Functions, , Process Group Functions, Functions for Job Control +@subsection Functions for Controlling Terminal Access + +These are the functions for reading or setting the foreground +process group of a terminal. You should include the header files +@file{sys/types.h} and @file{unistd.h} in your application to use +these functions. +@pindex unistd.h +@pindex sys/types.h + +Although these functions take a file descriptor argument to specify +the terminal device, the foreground job is associated with the terminal +file itself and not a particular open file descriptor. + +@comment unistd.h +@comment POSIX.1 +@deftypefun pid_t tcgetpgrp (int @var{filedes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Stub, or ioctl on BSD and GNU/Linux. +This function returns the process group ID of the foreground process +group associated with the terminal open on descriptor @var{filedes}. + +If there is no foreground process group, the return value is a number +greater than @code{1} that does not match the process group ID of any +existing process group. This can happen if all of the processes in the +job that was formerly the foreground job have terminated, and no other +job has yet been moved into the foreground. + +In case of an error, a value of @code{-1} is returned. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item ENOSYS +The system doesn't support job control. + +@item ENOTTY +The terminal file associated with the @var{filedes} argument isn't the +controlling terminal of the calling process. +@end table +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int tcsetpgrp (int @var{filedes}, pid_t @var{pgid}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Stub, or ioctl on BSD and GNU/Linux. +This function is used to set a terminal's foreground process group ID. +The argument @var{filedes} is a descriptor which specifies the terminal; +@var{pgid} specifies the process group. The calling process must be a +member of the same session as @var{pgid} and must have the same +controlling terminal. + +For terminal access purposes, this function is treated as output. If it +is called from a background process on its controlling terminal, +normally all processes in the process group are sent a @code{SIGTTOU} +signal. The exception is if the calling process itself is ignoring or +blocking @code{SIGTTOU} signals, in which case the operation is +performed and no signal is sent. + +If successful, @code{tcsetpgrp} returns @code{0}. A return value of +@code{-1} indicates an error. The following @code{errno} error +conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item EINVAL +The @var{pgid} argument is not valid. + +@item ENOSYS +The system doesn't support job control. + +@item ENOTTY +The @var{filedes} isn't the controlling terminal of the calling process. + +@item EPERM +The @var{pgid} isn't a process group in the same session as the calling +process. +@end table +@end deftypefun + +@comment termios.h +@comment Unix98 +@deftypefun pid_t tcgetsid (int @var{fildes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Ioctl call, if available, or tcgetpgrp followed by getsid. +This function is used to obtain the process group ID of the session +for which the terminal specified by @var{fildes} is the controlling terminal. +If the call is successful the group ID is returned. Otherwise the +return value is @code{(pid_t) -1} and the global variable @var{errno} +is set to the following value: +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item ENOTTY +The calling process does not have a controlling terminal, or the file +is not the controlling terminal. +@end table +@end deftypefun diff --git a/REORG.TODO/manual/lang.texi b/REORG.TODO/manual/lang.texi new file mode 100644 index 0000000000..a151c9b690 --- /dev/null +++ b/REORG.TODO/manual/lang.texi @@ -0,0 +1,1321 @@ +@c This node must have no pointers. +@node Language Features +@c @node Language Features, Library Summary, , Top +@c %MENU% C language features provided by the library +@appendix C Language Facilities in the Library + +Some of the facilities implemented by the C library really should be +thought of as parts of the C language itself. These facilities ought to +be documented in the C Language Manual, not in the library manual; but +since we don't have the language manual yet, and documentation for these +features has been written, we are publishing it here. + +@menu +* Consistency Checking:: Using @code{assert} to abort if + something ``impossible'' happens. +* Variadic Functions:: Defining functions with varying numbers + of args. +* Null Pointer Constant:: The macro @code{NULL}. +* Important Data Types:: Data types for object sizes. +* Data Type Measurements:: Parameters of data type representations. +@end menu + +@node Consistency Checking +@section Explicitly Checking Internal Consistency +@cindex consistency checking +@cindex impossible events +@cindex assertions + +When you're writing a program, it's often a good idea to put in checks +at strategic places for ``impossible'' errors or violations of basic +assumptions. These kinds of checks are helpful in debugging problems +with the interfaces between different parts of the program, for example. + +@pindex assert.h +The @code{assert} macro, defined in the header file @file{assert.h}, +provides a convenient way to abort the program while printing a message +about where in the program the error was detected. + +@vindex NDEBUG +Once you think your program is debugged, you can disable the error +checks performed by the @code{assert} macro by recompiling with the +macro @code{NDEBUG} defined. This means you don't actually have to +change the program source code to disable these checks. + +But disabling these consistency checks is undesirable unless they make +the program significantly slower. All else being equal, more error +checking is good no matter who is running the program. A wise user +would rather have a program crash, visibly, than have it return nonsense +without indicating anything might be wrong. + +@comment assert.h +@comment ISO +@deftypefn Macro void assert (int @var{expression}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asucorrupt{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +@c assert_fail_base calls asprintf, and fflushes stderr. +Verify the programmer's belief that @var{expression} is nonzero at +this point in the program. + +If @code{NDEBUG} is not defined, @code{assert} tests the value of +@var{expression}. If it is false (zero), @code{assert} aborts the +program (@pxref{Aborting a Program}) after printing a message of the +form: + +@smallexample +@file{@var{file}}:@var{linenum}: @var{function}: Assertion `@var{expression}' failed. +@end smallexample + +@noindent +on the standard error stream @code{stderr} (@pxref{Standard Streams}). +The filename and line number are taken from the C preprocessor macros +@code{__FILE__} and @code{__LINE__} and specify where the call to +@code{assert} was made. When using the GNU C compiler, the name of +the function which calls @code{assert} is taken from the built-in +variable @code{__PRETTY_FUNCTION__}; with older compilers, the function +name and following colon are omitted. + +If the preprocessor macro @code{NDEBUG} is defined before +@file{assert.h} is included, the @code{assert} macro is defined to do +absolutely nothing. + +@strong{Warning:} Even the argument expression @var{expression} is not +evaluated if @code{NDEBUG} is in effect. So never use @code{assert} +with arguments that involve side effects. For example, @code{assert +(++i > 0);} is a bad idea, because @code{i} will not be incremented if +@code{NDEBUG} is defined. +@end deftypefn + +Sometimes the ``impossible'' condition you want to check for is an error +return from an operating system function. Then it is useful to display +not only where the program crashes, but also what error was returned. +The @code{assert_perror} macro makes this easy. + +@comment assert.h +@comment GNU +@deftypefn Macro void assert_perror (int @var{errnum}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asucorrupt{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +@c assert_fail_base calls asprintf, and fflushes stderr. +Similar to @code{assert}, but verifies that @var{errnum} is zero. + +If @code{NDEBUG} is not defined, @code{assert_perror} tests the value of +@var{errnum}. If it is nonzero, @code{assert_perror} aborts the program +after printing a message of the form: + +@smallexample +@file{@var{file}}:@var{linenum}: @var{function}: @var{error text} +@end smallexample + +@noindent +on the standard error stream. The file name, line number, and function +name are as for @code{assert}. The error text is the result of +@w{@code{strerror (@var{errnum})}}. @xref{Error Messages}. + +Like @code{assert}, if @code{NDEBUG} is defined before @file{assert.h} +is included, the @code{assert_perror} macro does absolutely nothing. It +does not evaluate the argument, so @var{errnum} should not have any side +effects. It is best for @var{errnum} to be just a simple variable +reference; often it will be @code{errno}. + +This macro is a GNU extension. +@end deftypefn + +@strong{Usage note:} The @code{assert} facility is designed for +detecting @emph{internal inconsistency}; it is not suitable for +reporting invalid input or improper usage by the @emph{user} of the +program. + +The information in the diagnostic messages printed by the @code{assert} +and @code{assert_perror} macro is intended to help you, the programmer, +track down the cause of a bug, but is not really useful for telling a user +of your program why his or her input was invalid or why a command could not +be carried out. What's more, your program should not abort when given +invalid input, as @code{assert} would do---it should exit with nonzero +status (@pxref{Exit Status}) after printing its error messages, or perhaps +read another command or move on to the next input file. + +@xref{Error Messages}, for information on printing error messages for +problems that @emph{do not} represent bugs in the program. + + +@node Variadic Functions +@section Variadic Functions +@cindex variable number of arguments +@cindex variadic functions +@cindex optional arguments + +@w{ISO C} defines a syntax for declaring a function to take a variable +number or type of arguments. (Such functions are referred to as +@dfn{varargs functions} or @dfn{variadic functions}.) However, the +language itself provides no mechanism for such functions to access their +non-required arguments; instead, you use the variable arguments macros +defined in @file{stdarg.h}. + +This section describes how to declare variadic functions, how to write +them, and how to call them properly. + +@strong{Compatibility Note:} Many older C dialects provide a similar, +but incompatible, mechanism for defining functions with variable numbers +of arguments, using @file{varargs.h}. + +@menu +* Why Variadic:: Reasons for making functions take + variable arguments. +* How Variadic:: How to define and call variadic functions. +* Variadic Example:: A complete example. +@end menu + +@node Why Variadic +@subsection Why Variadic Functions are Used + +Ordinary C functions take a fixed number of arguments. When you define +a function, you specify the data type for each argument. Every call to +the function should supply the expected number of arguments, with types +that can be converted to the specified ones. Thus, if the function +@samp{foo} is declared with @code{int foo (int, char *);} then you must +call it with two arguments, a number (any kind will do) and a string +pointer. + +But some functions perform operations that can meaningfully accept an +unlimited number of arguments. + +In some cases a function can handle any number of values by operating on +all of them as a block. For example, consider a function that allocates +a one-dimensional array with @code{malloc} to hold a specified set of +values. This operation makes sense for any number of values, as long as +the length of the array corresponds to that number. Without facilities +for variable arguments, you would have to define a separate function for +each possible array size. + +The library function @code{printf} (@pxref{Formatted Output}) is an +example of another class of function where variable arguments are +useful. This function prints its arguments (which can vary in type as +well as number) under the control of a format template string. + +These are good reasons to define a @dfn{variadic} function which can +handle as many arguments as the caller chooses to pass. + +Some functions such as @code{open} take a fixed set of arguments, but +occasionally ignore the last few. Strict adherence to @w{ISO C} requires +these functions to be defined as variadic; in practice, however, the GNU +C compiler and most other C compilers let you define such a function to +take a fixed set of arguments---the most it can ever use---and then only +@emph{declare} the function as variadic (or not declare its arguments +at all!). + +@node How Variadic +@subsection How Variadic Functions are Defined and Used + +Defining and using a variadic function involves three steps: + +@itemize @bullet +@item +@emph{Define} the function as variadic, using an ellipsis +(@samp{@dots{}}) in the argument list, and using special macros to +access the variable arguments. @xref{Receiving Arguments}. + +@item +@emph{Declare} the function as variadic, using a prototype with an +ellipsis (@samp{@dots{}}), in all the files which call it. +@xref{Variadic Prototypes}. + +@item +@emph{Call} the function by writing the fixed arguments followed by the +additional variable arguments. @xref{Calling Variadics}. +@end itemize + +@menu +* Variadic Prototypes:: How to make a prototype for a function + with variable arguments. +* Receiving Arguments:: Steps you must follow to access the + optional argument values. +* How Many Arguments:: How to decide whether there are more arguments. +* Calling Variadics:: Things you need to know about calling + variable arguments functions. +* Argument Macros:: Detailed specification of the macros + for accessing variable arguments. +@end menu + +@node Variadic Prototypes +@subsubsection Syntax for Variable Arguments +@cindex function prototypes (variadic) +@cindex prototypes for variadic functions +@cindex variadic function prototypes + +A function that accepts a variable number of arguments must be declared +with a prototype that says so. You write the fixed arguments as usual, +and then tack on @samp{@dots{}} to indicate the possibility of +additional arguments. The syntax of @w{ISO C} requires at least one fixed +argument before the @samp{@dots{}}. For example, + +@smallexample +int +func (const char *a, int b, @dots{}) +@{ + @dots{} +@} +@end smallexample + +@noindent +defines a function @code{func} which returns an @code{int} and takes two +required arguments, a @code{const char *} and an @code{int}. These are +followed by any number of anonymous arguments. + +@strong{Portability note:} For some C compilers, the last required +argument must not be declared @code{register} in the function +definition. Furthermore, this argument's type must be +@dfn{self-promoting}: that is, the default promotions must not change +its type. This rules out array and function types, as well as +@code{float}, @code{char} (whether signed or not) and @w{@code{short int}} +(whether signed or not). This is actually an @w{ISO C} requirement. + +@node Receiving Arguments +@subsubsection Receiving the Argument Values +@cindex variadic function argument access +@cindex arguments (variadic functions) + +Ordinary fixed arguments have individual names, and you can use these +names to access their values. But optional arguments have no +names---nothing but @samp{@dots{}}. How can you access them? + +@pindex stdarg.h +The only way to access them is sequentially, in the order they were +written, and you must use special macros from @file{stdarg.h} in the +following three step process: + +@enumerate +@item +You initialize an argument pointer variable of type @code{va_list} using +@code{va_start}. The argument pointer when initialized points to the +first optional argument. + +@item +You access the optional arguments by successive calls to @code{va_arg}. +The first call to @code{va_arg} gives you the first optional argument, +the next call gives you the second, and so on. + +You can stop at any time if you wish to ignore any remaining optional +arguments. It is perfectly all right for a function to access fewer +arguments than were supplied in the call, but you will get garbage +values if you try to access too many arguments. + +@item +You indicate that you are finished with the argument pointer variable by +calling @code{va_end}. + +(In practice, with most C compilers, calling @code{va_end} does nothing. +This is always true in the GNU C compiler. But you might as well call +@code{va_end} just in case your program is someday compiled with a peculiar +compiler.) +@end enumerate + +@xref{Argument Macros}, for the full definitions of @code{va_start}, +@code{va_arg} and @code{va_end}. + +Steps 1 and 3 must be performed in the function that accepts the +optional arguments. However, you can pass the @code{va_list} variable +as an argument to another function and perform all or part of step 2 +there. + +You can perform the entire sequence of three steps multiple times +within a single function invocation. If you want to ignore the optional +arguments, you can do these steps zero times. + +You can have more than one argument pointer variable if you like. You +can initialize each variable with @code{va_start} when you wish, and +then you can fetch arguments with each argument pointer as you wish. +Each argument pointer variable will sequence through the same set of +argument values, but at its own pace. + +@strong{Portability note:} With some compilers, once you pass an +argument pointer value to a subroutine, you must not keep using the same +argument pointer value after that subroutine returns. For full +portability, you should just pass it to @code{va_end}. This is actually +an @w{ISO C} requirement, but most ANSI C compilers work happily +regardless. + +@node How Many Arguments +@subsubsection How Many Arguments Were Supplied +@cindex number of arguments passed +@cindex how many arguments +@cindex arguments, how many + +There is no general way for a function to determine the number and type +of the optional arguments it was called with. So whoever designs the +function typically designs a convention for the caller to specify the number +and type of arguments. It is up to you to define an appropriate calling +convention for each variadic function, and write all calls accordingly. + +One kind of calling convention is to pass the number of optional +arguments as one of the fixed arguments. This convention works provided +all of the optional arguments are of the same type. + +A similar alternative is to have one of the required arguments be a bit +mask, with a bit for each possible purpose for which an optional +argument might be supplied. You would test the bits in a predefined +sequence; if the bit is set, fetch the value of the next argument, +otherwise use a default value. + +A required argument can be used as a pattern to specify both the number +and types of the optional arguments. The format string argument to +@code{printf} is one example of this (@pxref{Formatted Output Functions}). + +Another possibility is to pass an ``end marker'' value as the last +optional argument. For example, for a function that manipulates an +arbitrary number of pointer arguments, a null pointer might indicate the +end of the argument list. (This assumes that a null pointer isn't +otherwise meaningful to the function.) The @code{execl} function works +in just this way; see @ref{Executing a File}. + + +@node Calling Variadics +@subsubsection Calling Variadic Functions +@cindex variadic functions, calling +@cindex calling variadic functions +@cindex declaring variadic functions + +You don't have to do anything special to call a variadic function. +Just put the arguments (required arguments, followed by optional ones) +inside parentheses, separated by commas, as usual. But you must declare +the function with a prototype and know how the argument values are converted. + +In principle, functions that are @emph{defined} to be variadic must also +be @emph{declared} to be variadic using a function prototype whenever +you call them. (@xref{Variadic Prototypes}, for how.) This is because +some C compilers use a different calling convention to pass the same set +of argument values to a function depending on whether that function +takes variable arguments or fixed arguments. + +In practice, the GNU C compiler always passes a given set of argument +types in the same way regardless of whether they are optional or +required. So, as long as the argument types are self-promoting, you can +safely omit declaring them. Usually it is a good idea to declare the +argument types for variadic functions, and indeed for all functions. +But there are a few functions which it is extremely convenient not to +have to declare as variadic---for example, @code{open} and +@code{printf}. + +@cindex default argument promotions +@cindex argument promotion +Since the prototype doesn't specify types for optional arguments, in a +call to a variadic function the @dfn{default argument promotions} are +performed on the optional argument values. This means the objects of +type @code{char} or @w{@code{short int}} (whether signed or not) are +promoted to either @code{int} or @w{@code{unsigned int}}, as +appropriate; and that objects of type @code{float} are promoted to type +@code{double}. So, if the caller passes a @code{char} as an optional +argument, it is promoted to an @code{int}, and the function can access +it with @code{va_arg (@var{ap}, int)}. + +Conversion of the required arguments is controlled by the function +prototype in the usual way: the argument expression is converted to the +declared argument type as if it were being assigned to a variable of +that type. + +@node Argument Macros +@subsubsection Argument Access Macros + +Here are descriptions of the macros used to retrieve variable arguments. +These macros are defined in the header file @file{stdarg.h}. +@pindex stdarg.h + +@comment stdarg.h +@comment ISO +@deftp {Data Type} va_list +The type @code{va_list} is used for argument pointer variables. +@end deftp + +@comment stdarg.h +@comment ISO +@deftypefn {Macro} void va_start (va_list @var{ap}, @var{last-required}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is no longer provided by glibc, but rather by the compiler. +This macro initializes the argument pointer variable @var{ap} to point +to the first of the optional arguments of the current function; +@var{last-required} must be the last required argument to the function. +@end deftypefn + +@comment stdarg.h +@comment ISO +@deftypefn {Macro} @var{type} va_arg (va_list @var{ap}, @var{type}) +@safety{@prelim{}@mtsafe{@mtsrace{:ap}}@assafe{}@acunsafe{@acucorrupt{}}} +@c This is no longer provided by glibc, but rather by the compiler. +@c Unlike the other va_ macros, that either start/end the lifetime of +@c the va_list object or don't modify it, this one modifies ap, and it +@c may leave it in a partially updated state. +The @code{va_arg} macro returns the value of the next optional argument, +and modifies the value of @var{ap} to point to the subsequent argument. +Thus, successive uses of @code{va_arg} return successive optional +arguments. + +The type of the value returned by @code{va_arg} is @var{type} as +specified in the call. @var{type} must be a self-promoting type (not +@code{char} or @code{short int} or @code{float}) that matches the type +of the actual argument. +@end deftypefn + +@comment stdarg.h +@comment ISO +@deftypefn {Macro} void va_end (va_list @var{ap}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is no longer provided by glibc, but rather by the compiler. +This ends the use of @var{ap}. After a @code{va_end} call, further +@code{va_arg} calls with the same @var{ap} may not work. You should invoke +@code{va_end} before returning from the function in which @code{va_start} +was invoked with the same @var{ap} argument. + +In @theglibc{}, @code{va_end} does nothing, and you need not ever +use it except for reasons of portability. +@refill +@end deftypefn + +Sometimes it is necessary to parse the list of parameters more than once +or one wants to remember a certain position in the parameter list. To +do this, one will have to make a copy of the current value of the +argument. But @code{va_list} is an opaque type and one cannot necessarily +assign the value of one variable of type @code{va_list} to another variable +of the same type. + +@comment stdarg.h +@comment ISO +@deftypefn {Macro} void va_copy (va_list @var{dest}, va_list @var{src}) +@deftypefnx {Macro} void __va_copy (va_list @var{dest}, va_list @var{src}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is no longer provided by glibc, but rather by the compiler. +The @code{va_copy} macro allows copying of objects of type +@code{va_list} even if this is not an integral type. The argument pointer +in @var{dest} is initialized to point to the same argument as the +pointer in @var{src}. + +This macro was added in ISO C99. When building for strict conformance +to ISO C90 (@samp{gcc -ansi}), it is not available. The macro +@code{__va_copy} is available as a GNU extension in any standards +mode; before GCC 3.0, it was the only macro for this functionality. +@end deftypefn + +If you want to use @code{va_copy} and be portable to pre-C99 systems, +you should always be prepared for the +possibility that this macro will not be available. On architectures where a +simple assignment is invalid, hopefully @code{va_copy} @emph{will} be available, +so one should always write something like this if concerned about +pre-C99 portability: + +@smallexample +@{ + va_list ap, save; + @dots{} +#ifdef va_copy + va_copy (save, ap); +#else + save = ap; +#endif + @dots{} +@} +@end smallexample + + +@node Variadic Example +@subsection Example of a Variadic Function + +Here is a complete sample function that accepts a variable number of +arguments. The first argument to the function is the count of remaining +arguments, which are added up and the result returned. While trivial, +this function is sufficient to illustrate how to use the variable +arguments facility. + +@comment Yes, this example has been tested. +@smallexample +@include add.c.texi +@end smallexample + +@node Null Pointer Constant +@section Null Pointer Constant +@cindex null pointer constant + +The null pointer constant is guaranteed not to point to any real object. +You can assign it to any pointer variable since it has type @code{void +*}. The preferred way to write a null pointer constant is with +@code{NULL}. + +@comment stddef.h +@comment ISO +@deftypevr Macro {void *} NULL +This is a null pointer constant. +@end deftypevr + +You can also use @code{0} or @code{(void *)0} as a null pointer +constant, but using @code{NULL} is cleaner because it makes the purpose +of the constant more evident. + +If you use the null pointer constant as a function argument, then for +complete portability you should make sure that the function has a +prototype declaration. Otherwise, if the target machine has two +different pointer representations, the compiler won't know which +representation to use for that argument. You can avoid the problem by +explicitly casting the constant to the proper pointer type, but we +recommend instead adding a prototype for the function you are calling. + +@node Important Data Types +@section Important Data Types + +The result of subtracting two pointers in C is always an integer, but the +precise data type varies from C compiler to C compiler. Likewise, the +data type of the result of @code{sizeof} also varies between compilers. +ISO C defines standard aliases for these two types, so you can refer to +them in a portable fashion. They are defined in the header file +@file{stddef.h}. +@pindex stddef.h + +@comment stddef.h +@comment ISO +@deftp {Data Type} ptrdiff_t +This is the signed integer type of the result of subtracting two +pointers. For example, with the declaration @code{char *p1, *p2;}, the +expression @code{p2 - p1} is of type @code{ptrdiff_t}. This will +probably be one of the standard signed integer types (@w{@code{short +int}}, @code{int} or @w{@code{long int}}), but might be a nonstandard +type that exists only for this purpose. +@end deftp + +@comment stddef.h +@comment ISO +@deftp {Data Type} size_t +This is an unsigned integer type used to represent the sizes of objects. +The result of the @code{sizeof} operator is of this type, and functions +such as @code{malloc} (@pxref{Unconstrained Allocation}) and +@code{memcpy} (@pxref{Copying Strings and Arrays}) accept arguments of +this type to specify object sizes. On systems using @theglibc{}, this +will be @w{@code{unsigned int}} or @w{@code{unsigned long int}}. + +@strong{Usage Note:} @code{size_t} is the preferred way to declare any +arguments or variables that hold the size of an object. +@end deftp + +@strong{Compatibility Note:} Implementations of C before the advent of +@w{ISO C} generally used @code{unsigned int} for representing object sizes +and @code{int} for pointer subtraction results. They did not +necessarily define either @code{size_t} or @code{ptrdiff_t}. Unix +systems did define @code{size_t}, in @file{sys/types.h}, but the +definition was usually a signed type. + +@node Data Type Measurements +@section Data Type Measurements + +Most of the time, if you choose the proper C data type for each object +in your program, you need not be concerned with just how it is +represented or how many bits it uses. When you do need such +information, the C language itself does not provide a way to get it. +The header files @file{limits.h} and @file{float.h} contain macros +which give you this information in full detail. + +@menu +* Width of Type:: How many bits does an integer type hold? +* Range of Type:: What are the largest and smallest values + that an integer type can hold? +* Floating Type Macros:: Parameters that measure the floating point types. +* Structure Measurement:: Getting measurements on structure types. +@end menu + +@node Width of Type +@subsection Computing the Width of an Integer Data Type +@cindex integer type width +@cindex width of integer type +@cindex type measurements, integer + +The most common reason that a program needs to know how many bits are in +an integer type is for using an array of @code{long int} as a bit vector. +You can access the bit at index @var{n} with + +@smallexample +vector[@var{n} / LONGBITS] & (1 << (@var{n} % LONGBITS)) +@end smallexample + +@noindent +provided you define @code{LONGBITS} as the number of bits in a +@code{long int}. + +@pindex limits.h +There is no operator in the C language that can give you the number of +bits in an integer data type. But you can compute it from the macro +@code{CHAR_BIT}, defined in the header file @file{limits.h}. + +@table @code +@comment limits.h +@comment ISO +@item CHAR_BIT +This is the number of bits in a @code{char}---eight, on most systems. +The value has type @code{int}. + +You can compute the number of bits in any data type @var{type} like +this: + +@smallexample +sizeof (@var{type}) * CHAR_BIT +@end smallexample +@end table + +That expression includes padding bits as well as value and sign bits. +On all systems supported by @theglibc{}, standard integer types other +than @code{_Bool} do not have any padding bits. TS 18661-1:2014 +defines additional macros for the width of integer types (the number +of value and sign bits); these macros can also be used in @code{#if} +preprocessor directives, whereas @code{sizeof} cannot. The following +macros are defined in @file{limits.h}. + +@vtable @code +@comment limits.h +@comment ISO +@item CHAR_WIDTH +@comment limits.h +@comment ISO +@itemx SCHAR_WIDTH +@comment limits.h +@comment ISO +@itemx UCHAR_WIDTH +@comment limits.h +@comment ISO +@itemx SHRT_WIDTH +@comment limits.h +@comment ISO +@itemx USHRT_WIDTH +@comment limits.h +@comment ISO +@itemx INT_WIDTH +@comment limits.h +@comment ISO +@itemx UINT_WIDTH +@comment limits.h +@comment ISO +@itemx LONG_WIDTH +@comment limits.h +@comment ISO +@itemx ULONG_WIDTH +@comment limits.h +@comment ISO +@itemx LLONG_WIDTH +@comment limits.h +@comment ISO +@itemx ULLONG_WIDTH + +These are the widths of the types @code{char}, @code{signed char}, +@code{unsigned char}, @code{short int}, @code{unsigned short int}, +@code{int}, @code{unsigned int}, @code{long int}, @code{unsigned long +int}, @code{long long int} and @code{unsigned long long int}, +respectively. +@end vtable + +Further such macros are defined in @file{stdint.h}. Apart from those +for types specified by width (@pxref{Integers}), the following are +defined. + +@vtable @code +@comment stdint.h +@comment ISO +@item INTPTR_WIDTH +@comment stdint.h +@comment ISO +@itemx UINTPTR_WIDTH +@comment stdint.h +@comment ISO +@itemx PTRDIFF_WIDTH +@comment stdint.h +@comment ISO +@itemx SIG_ATOMIC_WIDTH +@comment stdint.h +@comment ISO +@itemx SIZE_WIDTH +@comment stdint.h +@comment ISO +@itemx WCHAR_WIDTH +@comment stdint.h +@comment ISO +@itemx WINT_WIDTH + +These are the widths of the types @code{intptr_t}, @code{uintptr_t}, +@code{ptrdiff_t}, @code{sig_atomic_t}, @code{size_t}, @code{wchar_t} +and @code{wint_t}, respectively. +@end vtable + +@node Range of Type +@subsection Range of an Integer Type +@cindex integer type range +@cindex range of integer type +@cindex limits, integer types + +Suppose you need to store an integer value which can range from zero to +one million. Which is the smallest type you can use? There is no +general rule; it depends on the C compiler and target machine. You can +use the @samp{MIN} and @samp{MAX} macros in @file{limits.h} to determine +which type will work. + +Each signed integer type has a pair of macros which give the smallest +and largest values that it can hold. Each unsigned integer type has one +such macro, for the maximum value; the minimum value is, of course, +zero. + +The values of these macros are all integer constant expressions. The +@samp{MAX} and @samp{MIN} macros for @code{char} and @w{@code{short +int}} types have values of type @code{int}. The @samp{MAX} and +@samp{MIN} macros for the other types have values of the same type +described by the macro---thus, @code{ULONG_MAX} has type +@w{@code{unsigned long int}}. + +@comment Extra blank lines make it look better. +@vtable @code +@comment limits.h +@comment ISO +@item SCHAR_MIN + +This is the minimum value that can be represented by a @w{@code{signed char}}. + +@comment limits.h +@comment ISO +@item SCHAR_MAX +@comment limits.h +@comment ISO +@itemx UCHAR_MAX + +These are the maximum values that can be represented by a +@w{@code{signed char}} and @w{@code{unsigned char}}, respectively. + +@comment limits.h +@comment ISO +@item CHAR_MIN + +This is the minimum value that can be represented by a @code{char}. +It's equal to @code{SCHAR_MIN} if @code{char} is signed, or zero +otherwise. + +@comment limits.h +@comment ISO +@item CHAR_MAX + +This is the maximum value that can be represented by a @code{char}. +It's equal to @code{SCHAR_MAX} if @code{char} is signed, or +@code{UCHAR_MAX} otherwise. + +@comment limits.h +@comment ISO +@item SHRT_MIN + +This is the minimum value that can be represented by a @w{@code{signed +short int}}. On most machines that @theglibc{} runs on, +@code{short} integers are 16-bit quantities. + +@comment limits.h +@comment ISO +@item SHRT_MAX +@comment limits.h +@comment ISO +@itemx USHRT_MAX + +These are the maximum values that can be represented by a +@w{@code{signed short int}} and @w{@code{unsigned short int}}, +respectively. + +@comment limits.h +@comment ISO +@item INT_MIN + +This is the minimum value that can be represented by a @w{@code{signed +int}}. On most machines that @theglibc{} runs on, an @code{int} is +a 32-bit quantity. + +@comment limits.h +@comment ISO +@item INT_MAX +@comment limits.h +@comment ISO +@itemx UINT_MAX + +These are the maximum values that can be represented by, respectively, +the type @w{@code{signed int}} and the type @w{@code{unsigned int}}. + +@comment limits.h +@comment ISO +@item LONG_MIN + +This is the minimum value that can be represented by a @w{@code{signed +long int}}. On most machines that @theglibc{} runs on, @code{long} +integers are 32-bit quantities, the same size as @code{int}. + +@comment limits.h +@comment ISO +@item LONG_MAX +@comment limits.h +@comment ISO +@itemx ULONG_MAX + +These are the maximum values that can be represented by a +@w{@code{signed long int}} and @code{unsigned long int}, respectively. + +@comment limits.h +@comment ISO +@item LLONG_MIN + +This is the minimum value that can be represented by a @w{@code{signed +long long int}}. On most machines that @theglibc{} runs on, +@w{@code{long long}} integers are 64-bit quantities. + +@comment limits.h +@comment ISO +@item LLONG_MAX +@comment limits.h +@comment ISO +@itemx ULLONG_MAX + +These are the maximum values that can be represented by a @code{signed +long long int} and @code{unsigned long long int}, respectively. + +@comment limits.h +@comment GNU +@item LONG_LONG_MIN +@comment limits.h +@comment GNU +@itemx LONG_LONG_MAX +@comment limits.h +@comment GNU +@itemx ULONG_LONG_MAX +These are obsolete names for @code{LLONG_MIN}, @code{LLONG_MAX}, and +@code{ULLONG_MAX}. They are only available if @code{_GNU_SOURCE} is +defined (@pxref{Feature Test Macros}). In GCC versions prior to 3.0, +these were the only names available. + +@comment limits.h +@comment GNU +@item WCHAR_MAX + +This is the maximum value that can be represented by a @code{wchar_t}. +@xref{Extended Char Intro}. +@end vtable + +The header file @file{limits.h} also defines some additional constants +that parameterize various operating system and file system limits. These +constants are described in @ref{System Configuration}. + +@node Floating Type Macros +@subsection Floating Type Macros +@cindex floating type measurements +@cindex measurements of floating types +@cindex type measurements, floating +@cindex limits, floating types + +The specific representation of floating point numbers varies from +machine to machine. Because floating point numbers are represented +internally as approximate quantities, algorithms for manipulating +floating point data often need to take account of the precise details of +the machine's floating point representation. + +Some of the functions in the C library itself need this information; for +example, the algorithms for printing and reading floating point numbers +(@pxref{I/O on Streams}) and for calculating trigonometric and +irrational functions (@pxref{Mathematics}) use it to avoid round-off +error and loss of accuracy. User programs that implement numerical +analysis techniques also often need this information in order to +minimize or compute error bounds. + +The header file @file{float.h} describes the format used by your +machine. + +@menu +* Floating Point Concepts:: Definitions of terminology. +* Floating Point Parameters:: Details of specific macros. +* IEEE Floating Point:: The measurements for one common + representation. +@end menu + +@node Floating Point Concepts +@subsubsection Floating Point Representation Concepts + +This section introduces the terminology for describing floating point +representations. + +You are probably already familiar with most of these concepts in terms +of scientific or exponential notation for floating point numbers. For +example, the number @code{123456.0} could be expressed in exponential +notation as @code{1.23456e+05}, a shorthand notation indicating that the +mantissa @code{1.23456} is multiplied by the base @code{10} raised to +power @code{5}. + +More formally, the internal representation of a floating point number +can be characterized in terms of the following parameters: + +@itemize @bullet +@item +@cindex sign (of floating point number) +The @dfn{sign} is either @code{-1} or @code{1}. + +@item +@cindex base (of floating point number) +@cindex radix (of floating point number) +The @dfn{base} or @dfn{radix} for exponentiation, an integer greater +than @code{1}. This is a constant for a particular representation. + +@item +@cindex exponent (of floating point number) +The @dfn{exponent} to which the base is raised. The upper and lower +bounds of the exponent value are constants for a particular +representation. + +@cindex bias (of floating point number exponent) +Sometimes, in the actual bits representing the floating point number, +the exponent is @dfn{biased} by adding a constant to it, to make it +always be represented as an unsigned quantity. This is only important +if you have some reason to pick apart the bit fields making up the +floating point number by hand, which is something for which @theglibc{} +provides no support. So this is ignored in the discussion that +follows. + +@item +@cindex mantissa (of floating point number) +@cindex significand (of floating point number) +The @dfn{mantissa} or @dfn{significand} is an unsigned integer which is a +part of each floating point number. + +@item +@cindex precision (of floating point number) +The @dfn{precision} of the mantissa. If the base of the representation +is @var{b}, then the precision is the number of base-@var{b} digits in +the mantissa. This is a constant for a particular representation. + +@cindex hidden bit (of floating point number mantissa) +Many floating point representations have an implicit @dfn{hidden bit} in +the mantissa. This is a bit which is present virtually in the mantissa, +but not stored in memory because its value is always 1 in a normalized +number. The precision figure (see above) includes any hidden bits. + +Again, @theglibc{} provides no facilities for dealing with such +low-level aspects of the representation. +@end itemize + +The mantissa of a floating point number represents an implicit fraction +whose denominator is the base raised to the power of the precision. Since +the largest representable mantissa is one less than this denominator, the +value of the fraction is always strictly less than @code{1}. The +mathematical value of a floating point number is then the product of this +fraction, the sign, and the base raised to the exponent. + +@cindex normalized floating point number +We say that the floating point number is @dfn{normalized} if the +fraction is at least @code{1/@var{b}}, where @var{b} is the base. In +other words, the mantissa would be too large to fit if it were +multiplied by the base. Non-normalized numbers are sometimes called +@dfn{denormal}; they contain less precision than the representation +normally can hold. + +If the number is not normalized, then you can subtract @code{1} from the +exponent while multiplying the mantissa by the base, and get another +floating point number with the same value. @dfn{Normalization} consists +of doing this repeatedly until the number is normalized. Two distinct +normalized floating point numbers cannot be equal in value. + +(There is an exception to this rule: if the mantissa is zero, it is +considered normalized. Another exception happens on certain machines +where the exponent is as small as the representation can hold. Then +it is impossible to subtract @code{1} from the exponent, so a number +may be normalized even if its fraction is less than @code{1/@var{b}}.) + +@node Floating Point Parameters +@subsubsection Floating Point Parameters + +@pindex float.h +These macro definitions can be accessed by including the header file +@file{float.h} in your program. + +Macro names starting with @samp{FLT_} refer to the @code{float} type, +while names beginning with @samp{DBL_} refer to the @code{double} type +and names beginning with @samp{LDBL_} refer to the @code{long double} +type. (If GCC does not support @code{long double} as a distinct data +type on a target machine then the values for the @samp{LDBL_} constants +are equal to the corresponding constants for the @code{double} type.) + +Of these macros, only @code{FLT_RADIX} is guaranteed to be a constant +expression. The other macros listed here cannot be reliably used in +places that require constant expressions, such as @samp{#if} +preprocessing directives or in the dimensions of static arrays. + +Although the @w{ISO C} standard specifies minimum and maximum values for +most of these parameters, the GNU C implementation uses whatever values +describe the floating point representation of the target machine. So in +principle GNU C actually satisfies the @w{ISO C} requirements only if the +target machine is suitable. In practice, all the machines currently +supported are suitable. + +@vtable @code +@comment float.h +@comment ISO +@item FLT_ROUNDS +This value characterizes the rounding mode for floating point addition. +The following values indicate standard rounding modes: + +@need 750 + +@table @code +@item -1 +The mode is indeterminable. +@item 0 +Rounding is towards zero. +@item 1 +Rounding is to the nearest number. +@item 2 +Rounding is towards positive infinity. +@item 3 +Rounding is towards negative infinity. +@end table + +@noindent +Any other value represents a machine-dependent nonstandard rounding +mode. + +On most machines, the value is @code{1}, in accordance with the IEEE +standard for floating point. + +Here is a table showing how certain values round for each possible value +of @code{FLT_ROUNDS}, if the other aspects of the representation match +the IEEE single-precision standard. + +@smallexample + 0 1 2 3 + 1.00000003 1.0 1.0 1.00000012 1.0 + 1.00000007 1.0 1.00000012 1.00000012 1.0 +-1.00000003 -1.0 -1.0 -1.0 -1.00000012 +-1.00000007 -1.0 -1.00000012 -1.0 -1.00000012 +@end smallexample + +@comment float.h +@comment ISO +@item FLT_RADIX +This is the value of the base, or radix, of the exponent representation. +This is guaranteed to be a constant expression, unlike the other macros +described in this section. The value is 2 on all machines we know of +except the IBM 360 and derivatives. + +@comment float.h +@comment ISO +@item FLT_MANT_DIG +This is the number of base-@code{FLT_RADIX} digits in the floating point +mantissa for the @code{float} data type. The following expression +yields @code{1.0} (even though mathematically it should not) due to the +limited number of mantissa digits: + +@smallexample +float radix = FLT_RADIX; + +1.0f + 1.0f / radix / radix / @dots{} / radix +@end smallexample + +@noindent +where @code{radix} appears @code{FLT_MANT_DIG} times. + +@comment float.h +@comment ISO +@item DBL_MANT_DIG +@itemx LDBL_MANT_DIG +This is the number of base-@code{FLT_RADIX} digits in the floating point +mantissa for the data types @code{double} and @code{long double}, +respectively. + +@comment Extra blank lines make it look better. +@comment float.h +@comment ISO +@item FLT_DIG + +This is the number of decimal digits of precision for the @code{float} +data type. Technically, if @var{p} and @var{b} are the precision and +base (respectively) for the representation, then the decimal precision +@var{q} is the maximum number of decimal digits such that any floating +point number with @var{q} base 10 digits can be rounded to a floating +point number with @var{p} base @var{b} digits and back again, without +change to the @var{q} decimal digits. + +The value of this macro is supposed to be at least @code{6}, to satisfy +@w{ISO C}. + +@comment float.h +@comment ISO +@item DBL_DIG +@itemx LDBL_DIG + +These are similar to @code{FLT_DIG}, but for the data types +@code{double} and @code{long double}, respectively. The values of these +macros are supposed to be at least @code{10}. + +@comment float.h +@comment ISO +@item FLT_MIN_EXP +This is the smallest possible exponent value for type @code{float}. +More precisely, it is the minimum negative integer such that the value +@code{FLT_RADIX} raised to this power minus 1 can be represented as a +normalized floating point number of type @code{float}. + +@comment float.h +@comment ISO +@item DBL_MIN_EXP +@itemx LDBL_MIN_EXP + +These are similar to @code{FLT_MIN_EXP}, but for the data types +@code{double} and @code{long double}, respectively. + +@comment float.h +@comment ISO +@item FLT_MIN_10_EXP +This is the minimum negative integer such that @code{10} raised to this +power minus 1 can be represented as a normalized floating point number +of type @code{float}. This is supposed to be @code{-37} or even less. + +@comment float.h +@comment ISO +@item DBL_MIN_10_EXP +@itemx LDBL_MIN_10_EXP +These are similar to @code{FLT_MIN_10_EXP}, but for the data types +@code{double} and @code{long double}, respectively. + +@comment float.h +@comment ISO +@item FLT_MAX_EXP +This is the largest possible exponent value for type @code{float}. More +precisely, this is the maximum positive integer such that value +@code{FLT_RADIX} raised to this power minus 1 can be represented as a +floating point number of type @code{float}. + +@comment float.h +@comment ISO +@item DBL_MAX_EXP +@itemx LDBL_MAX_EXP +These are similar to @code{FLT_MAX_EXP}, but for the data types +@code{double} and @code{long double}, respectively. + +@comment float.h +@comment ISO +@item FLT_MAX_10_EXP +This is the maximum positive integer such that @code{10} raised to this +power minus 1 can be represented as a normalized floating point number +of type @code{float}. This is supposed to be at least @code{37}. + +@comment float.h +@comment ISO +@item DBL_MAX_10_EXP +@itemx LDBL_MAX_10_EXP +These are similar to @code{FLT_MAX_10_EXP}, but for the data types +@code{double} and @code{long double}, respectively. + +@comment float.h +@comment ISO +@item FLT_MAX + +The value of this macro is the maximum number representable in type +@code{float}. It is supposed to be at least @code{1E+37}. The value +has type @code{float}. + +The smallest representable number is @code{- FLT_MAX}. + +@comment float.h +@comment ISO +@item DBL_MAX +@itemx LDBL_MAX + +These are similar to @code{FLT_MAX}, but for the data types +@code{double} and @code{long double}, respectively. The type of the +macro's value is the same as the type it describes. + +@comment float.h +@comment ISO +@item FLT_MIN + +The value of this macro is the minimum normalized positive floating +point number that is representable in type @code{float}. It is supposed +to be no more than @code{1E-37}. + +@comment float.h +@comment ISO +@item DBL_MIN +@itemx LDBL_MIN + +These are similar to @code{FLT_MIN}, but for the data types +@code{double} and @code{long double}, respectively. The type of the +macro's value is the same as the type it describes. + +@comment float.h +@comment ISO +@item FLT_EPSILON + +This is the difference between 1 and the smallest floating point +number of type @code{float} that is greater than 1. It's supposed to +be no greater than @code{1E-5}. + +@comment float.h +@comment ISO +@item DBL_EPSILON +@itemx LDBL_EPSILON + +These are similar to @code{FLT_EPSILON}, but for the data types +@code{double} and @code{long double}, respectively. The type of the +macro's value is the same as the type it describes. The values are not +supposed to be greater than @code{1E-9}. +@end vtable + +@node IEEE Floating Point +@subsubsection IEEE Floating Point +@cindex IEEE floating point representation +@cindex floating point, IEEE + +Here is an example showing how the floating type measurements come out +for the most common floating point representation, specified by the +@cite{IEEE Standard for Binary Floating Point Arithmetic (ANSI/IEEE Std +754-1985)}. Nearly all computers designed since the 1980s use this +format. + +The IEEE single-precision float representation uses a base of 2. There +is a sign bit, a mantissa with 23 bits plus one hidden bit (so the total +precision is 24 base-2 digits), and an 8-bit exponent that can represent +values in the range -125 to 128, inclusive. + +So, for an implementation that uses this representation for the +@code{float} data type, appropriate values for the corresponding +parameters are: + +@smallexample +FLT_RADIX 2 +FLT_MANT_DIG 24 +FLT_DIG 6 +FLT_MIN_EXP -125 +FLT_MIN_10_EXP -37 +FLT_MAX_EXP 128 +FLT_MAX_10_EXP +38 +FLT_MIN 1.17549435E-38F +FLT_MAX 3.40282347E+38F +FLT_EPSILON 1.19209290E-07F +@end smallexample + +Here are the values for the @code{double} data type: + +@smallexample +DBL_MANT_DIG 53 +DBL_DIG 15 +DBL_MIN_EXP -1021 +DBL_MIN_10_EXP -307 +DBL_MAX_EXP 1024 +DBL_MAX_10_EXP 308 +DBL_MAX 1.7976931348623157E+308 +DBL_MIN 2.2250738585072014E-308 +DBL_EPSILON 2.2204460492503131E-016 +@end smallexample + +@node Structure Measurement +@subsection Structure Field Offset Measurement + +You can use @code{offsetof} to measure the location within a structure +type of a particular structure member. + +@comment stddef.h +@comment ISO +@deftypefn {Macro} size_t offsetof (@var{type}, @var{member}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is no longer provided by glibc, but rather by the compiler. +This expands to an integer constant expression that is the offset of the +structure member named @var{member} in the structure type @var{type}. +For example, @code{offsetof (struct s, elem)} is the offset, in bytes, +of the member @code{elem} in a @code{struct s}. + +This macro won't work if @var{member} is a bit field; you get an error +from the C compiler in that case. +@end deftypefn diff --git a/REORG.TODO/manual/lgpl-2.1.texi b/REORG.TODO/manual/lgpl-2.1.texi new file mode 100644 index 0000000000..ab03d6cc37 --- /dev/null +++ b/REORG.TODO/manual/lgpl-2.1.texi @@ -0,0 +1,549 @@ +@c The GNU Lesser General Public License. +@center Version 2.1, February 1999 + +@c This file is intended to be included within another document, +@c hence no sectioning command or @node. + +@display +Copyright @copyright{} 1991, 1999 Free Software Foundation, Inc. +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts +as the successor of the GNU Library Public License, version 2, hence the +version number 2.1.] +@end display + +@subheading Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software---to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software---typically libraries---of the Free +Software Foundation and other authors who decide to use it. You can use +it too, but we suggest you first think carefully about whether this +license or the ordinary General Public License is the better strategy to +use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of it +in new free programs; and that you are informed that you can do these +things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the @dfn{Lesser} General Public License because it +does @emph{Less} to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +``work based on the library'' and a ``work that uses the library''. The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + +@subheading TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +@enumerate 0 +@item +This License Agreement applies to any software library or other program +which contains a notice placed by the copyright holder or other +authorized party saying it may be distributed under the terms of this +Lesser General Public License (also called ``this License''). Each +licensee is addressed as ``you''. + + A ``library'' means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The ``Library'', below, refers to any such software library or work +which has been distributed under these terms. A ``work based on the +Library'' means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term ``modification''.) + + ``Source code'' for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + +@item +You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + +@item +You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + +@enumerate a +@item +The modified work must itself be a software library. + +@item +You must cause the files modified to carry prominent notices +stating that you changed the files and the date of any change. + +@item +You must cause the whole of the work to be licensed at no +charge to all third parties under the terms of this License. + +@item +If a facility in the modified Library refers to a function or a +table of data to be supplied by an application program that uses +the facility, other than as an argument passed when the facility +is invoked, then you must make a good faith effort to ensure that, +in the event an application does not supply such function or +table, the facility still operates, and performs whatever part of +its purpose remains meaningful. + +(For example, a function in a library to compute square roots has +a purpose that is entirely well-defined independent of the +application. Therefore, Subsection 2d requires that any +application-supplied function or table used by this function must +be optional: if the application does not supply it, the square +root function must still compute square roots.) +@end enumerate + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + +@item +You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + +@item +You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + +@item +A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a ``work that uses the Library''. Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a ``work that uses the Library'' with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a ``work that uses the +library''. The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a ``work that uses the Library'' uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + +@item +As an exception to the Sections above, you may also combine or +link a ``work that uses the Library'' with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + +@enumerate a +@item +Accompany the work with the complete corresponding +machine-readable source code for the Library including whatever +changes were used in the work (which must be distributed under +Sections 1 and 2 above); and, if the work is an executable linked +with the Library, with the complete machine-readable ``work that +uses the Library'', as object code and/or source code, so that the +user can modify the Library and then relink to produce a modified +executable containing the modified Library. (It is understood +that the user who changes the contents of definitions files in the +Library will not necessarily be able to recompile the application +to use the modified definitions.) + +@item +Use a suitable shared library mechanism for linking with the Library. A +suitable mechanism is one that (1) uses at run time a copy of the +library already present on the user's computer system, rather than +copying library functions into the executable, and (2) will operate +properly with a modified version of the library, if the user installs +one, as long as the modified version is interface-compatible with the +version that the work was made with. + +@item +Accompany the work with a written offer, valid for at +least three years, to give the same user the materials +specified in Subsection 6a, above, for a charge no more +than the cost of performing this distribution. + +@item +If distribution of the work is made by offering access to copy +from a designated place, offer equivalent access to copy the above +specified materials from the same place. + +@item +Verify that the user has already received a copy of these +materials or that you have already sent this user a copy. +@end enumerate + + For an executable, the required form of the ``work that uses the +Library'' must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies the +executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + +@item +You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + +@enumerate a +@item +Accompany the combined library with a copy of the same work +based on the Library, uncombined with any other library +facilities. This must be distributed under the terms of the +Sections above. + +@item +Give prominent notice with the combined library of the fact +that part of it is a work based on the Library, and explaining +where to find the accompanying uncombined form of the same work. +@end enumerate + +@item +You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + +@item +You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + +@item +Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + +@item +If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + +@item +If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + +@item +The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +``any later version'', you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + +@item +If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + +@center @b{NO WARRANTY} + +@item +BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY ``AS IS'' WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +@item +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. +@end enumerate + +@subheading END OF TERMS AND CONDITIONS + +@page +@subheading How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +``copyright'' line and a pointer to where the full notice is found. + +@smallexample +@var{one line to give the library's name and an idea of what it does.} +Copyright (C) @var{year} @var{name of author} + +This library is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at +your option) any later version. + +This library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +USA. +@end smallexample + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a ``copyright disclaimer'' for the library, if +necessary. Here is a sample; alter the names: + +@smallexample +Yoyodyne, Inc., hereby disclaims all copyright interest in the library +`Frob' (a library for tweaking knobs) written by James Random Hacker. + +@var{signature of Ty Coon}, 1 April 1990 +Ty Coon, President of Vice +@end smallexample + +That's all there is to it! diff --git a/REORG.TODO/manual/libc-texinfo.sh b/REORG.TODO/manual/libc-texinfo.sh new file mode 100644 index 0000000000..4d0a52213b --- /dev/null +++ b/REORG.TODO/manual/libc-texinfo.sh @@ -0,0 +1,102 @@ +#!/bin/sh + +OUTDIR=$1 +shift + +# Create libc.texinfo from the chapter files. + +trap "rm -f ${OUTDIR}*.$$; exit 1" 1 2 15 + +exec 3>${OUTDIR}incl.$$ 4>${OUTDIR}smenu.$$ 5>${OUTDIR}lmenu.$$ + +build_menu () { + while IFS=: read file node; do + echo "@include $file" >&3 + echo "* $node:: `sed -n 's/^@c %MENU% //p' $file`" >&4 + $AWK 'BEGIN { do_menu = 0 } + /^@node / { sub(/^@node /, ""); sub(/,.*$/, ""); node = $0 } + /^@menu/ { printf "\n%s\n\n", node; do_menu = 1; next } + /^@end menu/ { do_menu = 0 } + do_menu { print }' $file >&5 + done +} + +collect_nodes () { + egrep '^(@c )?@node.*Top' "$@" /dev/null | cut -d, -f-2 | + sed 's/@c //; s/, /:/; s/:@node /:/; s/ /_/g; s/:/ /g' | + $AWK '{ file[$2] = $1; nnode[$2] = $3 } + END { for (x in file) + if (file[x] != "") + print file[x] ":" x, file[nnode[x]] ":" nnode[x] }' | + $AWK -f tsort.awk | sed 's/_/ /g' +} + +# Emit "@set ADD-ON" for each add-on contributing a manual chapter. +for addon in $2; do + addon=`basename $addon .texi` + echo >&3 "@set $addon" +done + +collect_nodes $1 | build_menu + +if [ -n "$2" ]; then + + { echo; echo 'Add-ons'; echo; } >&4 + + egrep '^(@c )?@node.*Top' `echo $2 /dev/null | tr ' ' '\n' | sort` | + cut -d, -f1 | sed 's/@c //;s/@node //' | build_menu + +fi + +{ echo; echo 'Appendices'; echo; } >&4 + +collect_nodes $3 | build_menu + +exec 3>&- 4>&- 5>&- + +mv -f ${OUTDIR}incl.$$ ${OUTDIR}chapters.texi + +{ + echo '@menu' + $AWK -F: ' + /^\*/ { + printf("%-32s", $1 "::"); + x = split($3, word, " "); + hpos = 34; + for (i = 1; i <= x; i++) { + hpos += length(word[i]) + 1; + if (hpos > 78) { + printf("\n%34s", ""); + hpos = 35 + length(word[i]); + } + printf(" %s", word[i]); + } + print "."; + } + + !/^\*/ { print; } + ' ${OUTDIR}smenu.$$ + cat <<EOF +* Free Manuals:: Free Software Needs Free Documentation. +* Copying:: The GNU Lesser General Public License says + how you can copy and share the GNU C Library. +* Documentation License:: This manual is under the GNU Free + Documentation License. + +Indices + +* Concept Index:: Index of concepts and names. +* Type Index:: Index of types and type qualifiers. +* Function Index:: Index of functions and function-like macros. +* Variable Index:: Index of variables and variable-like macros. +* File Index:: Index of programs and files. + + @detailmenu + --- The Detailed Node Listing --- +EOF + cat ${OUTDIR}lmenu.$$ + echo '@end detailmenu' + echo '@end menu'; } >${OUTDIR}top-menu.texi.$$ +mv -f ${OUTDIR}top-menu.texi.$$ ${OUTDIR}top-menu.texi + +rm -f ${OUTDIR}*.$$ diff --git a/REORG.TODO/manual/libc.texinfo b/REORG.TODO/manual/libc.texinfo new file mode 100644 index 0000000000..9c6a5d5152 --- /dev/null +++ b/REORG.TODO/manual/libc.texinfo @@ -0,0 +1,166 @@ +\input texinfo @c -*- Texinfo -*- +@comment %**start of header (This is for running Texinfo on a region.) +@setfilename libc.info +@settitle The GNU C Library +@documentencoding UTF-8 +@c '@codequotebacktick on' and '@codequoteundirected on' require +@c Texinfo 5.0 or later, so use the older equivalent @set variables +@c supported in Texinfo 4.11 and later. +@set txicodequoteundirected +@set txicodequotebacktick + +@c setchapternewpage odd + +@include macros.texi + +@comment Tell install-info what to do. +@dircategory Software libraries +@direntry +* Libc: (libc). C library. +@end direntry +@include dir-add.texi + +@include pkgvers.texi + +@c This tells texinfo.tex to use the real section titles in xrefs in +@c place of the node name, when no section title is explicitly given. +@set xref-automatic-section-title +@c @smallbook +@comment %**end of header (This is for running Texinfo on a region.) + +@c Everything related to printed editions is disabled until we have +@c resolved how to keep them in sync with the master sources of the +@c manual. +@c sold 0.06/1.09, print run out 21may96 +@c @set EDITION 0.13 +@c @set ISBN 1-882114-55-8 + +@include version.texi + +@set FDL_VERSION 1.3 + +@copying +This file documents @theglibc{}. + +This is +@c Disabled (printed editions, see above). +@c Edition @value{EDITION} of +@cite{The GNU C Library Reference Manual}, for version +@ifset PKGVERSION_DEFAULT +@value{VERSION}. +@end ifset +@ifclear PKGVERSION_DEFAULT +@value{VERSION} @value{PKGVERSION}. +@end ifclear + +Copyright @copyright{} 1993--2017 Free Software Foundation, Inc. + +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version +@value{FDL_VERSION} or any later version published by the Free +Software Foundation; with the Invariant Sections being ``Free Software +Needs Free Documentation'' and ``GNU Lesser General Public License'', +the Front-Cover texts being ``A GNU Manual'', and with the Back-Cover +Texts as in (a) below. A copy of the license is included in the +section entitled "GNU Free Documentation License". + +(a) The FSF's Back-Cover Text is: ``You have the freedom to +copy and modify this GNU manual. Buying copies from the FSF +supports it in developing GNU and promoting software freedom.'' +@end copying + +@iftex +@shorttitlepage The GNU C Library Reference Manual +@end iftex +@titlepage +@center @titlefont{The GNU C Library} +@sp 1 +@center @titlefont{Reference Manual} +@sp 2 +@center Sandra Loosemore +@center with +@center Richard M. Stallman, Roland McGrath, Andrew Oram, and Ulrich Drepper +@sp 3 +@c Disabled (printed editions, see above). +@c @center Edition @value{EDITION} +@c @sp 1 +@center for version @value{VERSION} +@ifclear PKGVERSION_DEFAULT +@sp 1 +@center @value{PKGVERSION} +@end ifclear +@page +@vskip 0pt plus 1filll +@insertcopying + +@c Disabled (printed editions, see above). +@c @sp 2 +@c Published by the @uref{http://www.fsf.org/, Free Software Foundation} @* +@c ISBN @value{ISBN} @* + +@c Disabled (printed editions, see above). +@c @sp 2 +@c Cover art for the Free Software Foundation's printed edition +@c by Etienne Suvasa. + +@end titlepage + +@shortcontents +@contents + +@ifnottex +@node Top, Introduction, (dir), (dir) +@top Main Menu +This is +@c Disabled (printed editions, see above). +@c Edition @value{EDITION} of +@cite{The GNU C Library Reference Manual}, for Version @value{VERSION} +@ifclear PKGVERSION_DEFAULT +@value{PKGVERSION} +@end ifclear +of @theglibc{}. +@end ifnottex + +@include top-menu.texi +@include chapters.texi + +@node Free Manuals, Copying, Contributors, Top +@appendix Free Software Needs Free Documentation +@include freemanuals.texi + +@node Copying, Documentation License, Free Manuals, Top +@appendix GNU Lesser General Public License +@include lgpl-2.1.texi + +@node Documentation License, Concept Index, Copying, Top +@appendix GNU Free Documentation License +@cindex FDL, GNU Free Documentation License +@include fdl-@value{FDL_VERSION}.texi + +@node Concept Index, Type Index, Documentation License, Top +@unnumbered Concept Index + +@printindex cp + +@node Type Index, Function Index, Concept Index, Top +@unnumbered Type Index + +@printindex tp + +@node Function Index, Variable Index, Type Index, Top +@unnumbered Function and Macro Index + +@printindex fn + +@node Variable Index, File Index, Function Index, Top +@unnumbered Variable and Constant Macro Index + +@printindex vr + +@node File Index, , Variable Index, Top +@unnumbered Program and File Index + +@printindex pg + + +@bye diff --git a/REORG.TODO/manual/libcbook.texi b/REORG.TODO/manual/libcbook.texi new file mode 100644 index 0000000000..b248304ede --- /dev/null +++ b/REORG.TODO/manual/libcbook.texi @@ -0,0 +1,3 @@ +\input texinfo +@finalout +@include libc.texinfo diff --git a/REORG.TODO/manual/libdl.texi b/REORG.TODO/manual/libdl.texi new file mode 100644 index 0000000000..e3fe0452d9 --- /dev/null +++ b/REORG.TODO/manual/libdl.texi @@ -0,0 +1,10 @@ +@c FIXME these are undocumented: +@c dladdr +@c dladdr1 +@c dlclose +@c dlerror +@c dlinfo +@c dlmopen +@c dlopen +@c dlsym +@c dlvsym diff --git a/REORG.TODO/manual/libm-err-tab.pl b/REORG.TODO/manual/libm-err-tab.pl new file mode 100755 index 0000000000..75f5e5b7b7 --- /dev/null +++ b/REORG.TODO/manual/libm-err-tab.pl @@ -0,0 +1,205 @@ +#!/usr/bin/perl -w +# Copyright (C) 1999-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# Contributed by Andreas Jaeger <aj@suse.de>, 1999. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +# Information about tests are stored in: %results +# $results{$test}{"type"} is the result type, e.g. normal or complex. +# In the following description $platform, $type and $float are: +# - $platform is the used platform +# - $type is either "normal", "real" (for the real part of a complex number) +# or "imag" (for the imaginary part # of a complex number). +# - $float is either of float, ifloat, double, idouble, ldouble, ildouble; +# It represents the underlying floating point type (float, double or long +# double) and if inline functions (the leading i stands for inline) +# are used. +# $results{$test}{$platform}{$type}{$float} is defined and has a delta +# or 'fail' as value. + +use File::Find; + +use strict; + +use vars qw ($sources @platforms %pplatforms); +use vars qw (%results @all_floats %suffices %all_functions); + + +# all_floats is in output order and contains all recognised float types that +# we're going to output +@all_floats = ('float', 'double', 'ldouble'); +%suffices = + ( 'float' => 'f', + 'double' => '', + 'ldouble' => 'l' + ); + +# Pretty description of platform +%pplatforms = (); + +%all_functions = (); + +if ($#ARGV == 0) { + $sources = $ARGV[0]; +} else { + $sources = '/usr/src/cvs/libc'; +} + +find (\&find_files, $sources); + +@platforms = sort by_platforms @platforms; + +&print_all; + +sub find_files { + if ($_ eq 'libm-test-ulps') { + # print "Parsing $File::Find::name\n"; + push @platforms, $File::Find::dir; + my ($file, $name); + $file = "${File::Find::name}-name"; + open NAME, $file or die ("Can't open $file: $!"); + $name = <NAME>; + chomp $name; + close NAME; + $pplatforms{$File::Find::dir} = $name; + &parse_ulps ($File::Find::name, $File::Find::dir); + } +} + +# Parse ulps file +sub parse_ulps { + my ($file, $platform) = @_; + my ($test, $type, $float, $eps, $ignore_fn); + + # $type has the following values: + # "normal": No complex variable + # "real": Real part of complex result + # "imag": Imaginary part of complex result + open ULP, $file or die ("Can't open $file: $!"); + while (<ULP>) { + chop; + # ignore comments and empty lines + next if /^#/; + next if /^\s*$/; + if (/^Function: /) { + if (/Real part of/) { + s/Real part of //; + $type = 'real'; + } elsif (/Imaginary part of/) { + s/Imaginary part of //; + $type = 'imag'; + } else { + $type = 'normal'; + } + ($test) = ($_ =~ /^Function:\s*\"([a-zA-Z0-9_]+)\"/); + next; + } + if ($test =~ /_(downward|towardzero|upward|vlen)/) { + $ignore_fn = 1; + } else { + $ignore_fn = 0; + $all_functions{$test} = 1; + } + if (/^i?(float|double|ldouble):/) { + ($float, $eps) = split /\s*:\s*/,$_,2; + if ($ignore_fn) { + next; + } elsif ($eps eq 'fail') { + $results{$test}{$platform}{$type}{$float} = 'fail'; + } elsif ($eps eq "0") { + # ignore + next; + } elsif (!exists $results{$test}{$platform}{$type}{$float} + || $results{$test}{$platform}{$type}{$float} ne 'fail') { + $results{$test}{$platform}{$type}{$float} = $eps; + } + if ($type =~ /^real|imag$/) { + $results{$test}{'type'} = 'complex'; + } elsif ($type eq 'normal') { + $results{$test}{'type'} = 'normal'; + } + next; + } + print "Skipping unknown entry: `$_'\n"; + } + close ULP; +} + +sub get_value { + my ($fct, $platform, $type, $float) = @_; + + return (exists $results{$fct}{$platform}{$type}{$float} + ? $results{$fct}{$platform}{$type}{$float} : "0"); +} + +sub print_platforms { + my (@p) = @_; + my ($fct, $platform, $float, $first, $i, $platform_no, $platform_total); + + print '@multitable {nexttowardf} '; + foreach (@p) { + print ' {1000 + i 1000}'; + } + print "\n"; + + print '@item Function '; + foreach (@p) { + print ' @tab '; + print $pplatforms{$_}; + } + print "\n"; + + + foreach $fct (sort keys %all_functions) { + foreach $float (@all_floats) { + print "\@item $fct$suffices{$float} "; + foreach $platform (@p) { + print ' @tab '; + if (exists $results{$fct}{$platform}{'normal'}{$float} + || exists $results{$fct}{$platform}{'real'}{$float} + || exists $results{$fct}{$platform}{'imag'}{$float}) { + if ($results{$fct}{'type'} eq 'complex') { + print &get_value ($fct, $platform, 'real', $float), + ' + i ', &get_value ($fct, $platform, 'imag', $float); + } else { + print $results{$fct}{$platform}{'normal'}{$float}; + } + } else { + print '-'; + } + } + print "\n"; + } + } + + print "\@end multitable\n"; +} + +sub print_all { + my ($i, $max); + + my ($columns) = 5; + + # Print only 5 platforms at a time. + for ($i=0; $i < $#platforms; $i+=$columns) { + $max = $i+$columns-1 > $#platforms ? $#platforms : $i+$columns-1; + print_platforms (@platforms[$i .. $max]); + } +} + +sub by_platforms { + return $pplatforms{$a} cmp $pplatforms{$b}; +} diff --git a/REORG.TODO/manual/llio.texi b/REORG.TODO/manual/llio.texi new file mode 100644 index 0000000000..8d18509d45 --- /dev/null +++ b/REORG.TODO/manual/llio.texi @@ -0,0 +1,4429 @@ +@node Low-Level I/O, File System Interface, I/O on Streams, Top +@c %MENU% Low-level, less portable I/O +@chapter Low-Level Input/Output + +This chapter describes functions for performing low-level input/output +operations on file descriptors. These functions include the primitives +for the higher-level I/O functions described in @ref{I/O on Streams}, as +well as functions for performing low-level control operations for which +there are no equivalents on streams. + +Stream-level I/O is more flexible and usually more convenient; +therefore, programmers generally use the descriptor-level functions only +when necessary. These are some of the usual reasons: + +@itemize @bullet +@item +For reading binary files in large chunks. + +@item +For reading an entire file into core before parsing it. + +@item +To perform operations other than data transfer, which can only be done +with a descriptor. (You can use @code{fileno} to get the descriptor +corresponding to a stream.) + +@item +To pass descriptors to a child process. (The child can create its own +stream to use a descriptor that it inherits, but cannot inherit a stream +directly.) +@end itemize + +@menu +* Opening and Closing Files:: How to open and close file + descriptors. +* I/O Primitives:: Reading and writing data. +* File Position Primitive:: Setting a descriptor's file + position. +* Descriptors and Streams:: Converting descriptor to stream + or vice-versa. +* Stream/Descriptor Precautions:: Precautions needed if you use both + descriptors and streams. +* Scatter-Gather:: Fast I/O to discontinuous buffers. +* Memory-mapped I/O:: Using files like memory. +* Waiting for I/O:: How to check for input or output + on multiple file descriptors. +* Synchronizing I/O:: Making sure all I/O actions completed. +* Asynchronous I/O:: Perform I/O in parallel. +* Control Operations:: Various other operations on file + descriptors. +* Duplicating Descriptors:: Fcntl commands for duplicating + file descriptors. +* Descriptor Flags:: Fcntl commands for manipulating + flags associated with file + descriptors. +* File Status Flags:: Fcntl commands for manipulating + flags associated with open files. +* File Locks:: Fcntl commands for implementing + file locking. +* Open File Description Locks:: Fcntl commands for implementing + open file description locking. +* Open File Description Locks Example:: An example of open file description lock + usage +* Interrupt Input:: Getting an asynchronous signal when + input arrives. +* IOCTLs:: Generic I/O Control operations. +@end menu + + +@node Opening and Closing Files +@section Opening and Closing Files + +@cindex opening a file descriptor +@cindex closing a file descriptor +This section describes the primitives for opening and closing files +using file descriptors. The @code{open} and @code{creat} functions are +declared in the header file @file{fcntl.h}, while @code{close} is +declared in @file{unistd.h}. +@pindex unistd.h +@pindex fcntl.h + +@comment fcntl.h +@comment POSIX.1 +@deftypefun int open (const char *@var{filename}, int @var{flags}[, mode_t @var{mode}]) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +The @code{open} function creates and returns a new file descriptor for +the file named by @var{filename}. Initially, the file position +indicator for the file is at the beginning of the file. The argument +@var{mode} (@pxref{Permission Bits}) is used only when a file is +created, but it doesn't hurt to supply the argument in any case. + +The @var{flags} argument controls how the file is to be opened. This is +a bit mask; you create the value by the bitwise OR of the appropriate +parameters (using the @samp{|} operator in C). +@xref{File Status Flags}, for the parameters available. + +The normal return value from @code{open} is a non-negative integer file +descriptor. In the case of an error, a value of @math{-1} is returned +instead. In addition to the usual file name errors (@pxref{File +Name Errors}), the following @code{errno} error conditions are defined +for this function: + +@table @code +@item EACCES +The file exists but is not readable/writable as requested by the @var{flags} +argument, or the file does not exist and the directory is unwritable so +it cannot be created. + +@item EEXIST +Both @code{O_CREAT} and @code{O_EXCL} are set, and the named file already +exists. + +@item EINTR +The @code{open} operation was interrupted by a signal. +@xref{Interrupted Primitives}. + +@item EISDIR +The @var{flags} argument specified write access, and the file is a directory. + +@item EMFILE +The process has too many files open. +The maximum number of file descriptors is controlled by the +@code{RLIMIT_NOFILE} resource limit; @pxref{Limits on Resources}. + +@item ENFILE +The entire system, or perhaps the file system which contains the +directory, cannot support any additional open files at the moment. +(This problem cannot happen on @gnuhurdsystems{}.) + +@item ENOENT +The named file does not exist, and @code{O_CREAT} is not specified. + +@item ENOSPC +The directory or file system that would contain the new file cannot be +extended, because there is no disk space left. + +@item ENXIO +@code{O_NONBLOCK} and @code{O_WRONLY} are both set in the @var{flags} +argument, the file named by @var{filename} is a FIFO (@pxref{Pipes and +FIFOs}), and no process has the file open for reading. + +@item EROFS +The file resides on a read-only file system and any of @w{@code{O_WRONLY}}, +@code{O_RDWR}, and @code{O_TRUNC} are set in the @var{flags} argument, +or @code{O_CREAT} is set and the file does not already exist. +@end table + +@c !!! umask + +If on a 32 bit machine the sources are translated with +@code{_FILE_OFFSET_BITS == 64} the function @code{open} returns a file +descriptor opened in the large file mode which enables the file handling +functions to use files up to @twoexp{63} bytes in size and offset from +@minus{}@twoexp{63} to @twoexp{63}. This happens transparently for the user +since all of the low-level file handling functions are equally replaced. + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{open} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this calls to @code{open} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The @code{open} function is the underlying primitive for the @code{fopen} +and @code{freopen} functions, that create streams. +@end deftypefun + +@comment fcntl.h +@comment Unix98 +@deftypefun int open64 (const char *@var{filename}, int @var{flags}[, mode_t @var{mode}]) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +This function is similar to @code{open}. It returns a file descriptor +which can be used to access the file named by @var{filename}. The only +difference is that on 32 bit systems the file is opened in the +large file mode. I.e., file length and file offsets can exceed 31 bits. + +When the sources are translated with @code{_FILE_OFFSET_BITS == 64} this +function is actually available under the name @code{open}. I.e., the +new, extended API using 64 bit file sizes and offsets transparently +replaces the old API. +@end deftypefun + +@comment fcntl.h +@comment POSIX.1 +@deftypefn {Obsolete function} int creat (const char *@var{filename}, mode_t @var{mode}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +This function is obsolete. The call: + +@smallexample +creat (@var{filename}, @var{mode}) +@end smallexample + +@noindent +is equivalent to: + +@smallexample +open (@var{filename}, O_WRONLY | O_CREAT | O_TRUNC, @var{mode}) +@end smallexample + +If on a 32 bit machine the sources are translated with +@code{_FILE_OFFSET_BITS == 64} the function @code{creat} returns a file +descriptor opened in the large file mode which enables the file handling +functions to use files up to @twoexp{63} in size and offset from +@minus{}@twoexp{63} to @twoexp{63}. This happens transparently for the user +since all of the low-level file handling functions are equally replaced. +@end deftypefn + +@comment fcntl.h +@comment Unix98 +@deftypefn {Obsolete function} int creat64 (const char *@var{filename}, mode_t @var{mode}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +This function is similar to @code{creat}. It returns a file descriptor +which can be used to access the file named by @var{filename}. The only +difference is that on 32 bit systems the file is opened in the +large file mode. I.e., file length and file offsets can exceed 31 bits. + +To use this file descriptor one must not use the normal operations but +instead the counterparts named @code{*64}, e.g., @code{read64}. + +When the sources are translated with @code{_FILE_OFFSET_BITS == 64} this +function is actually available under the name @code{open}. I.e., the +new, extended API using 64 bit file sizes and offsets transparently +replaces the old API. +@end deftypefn + +@comment unistd.h +@comment POSIX.1 +@deftypefun int close (int @var{filedes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +The function @code{close} closes the file descriptor @var{filedes}. +Closing a file has the following consequences: + +@itemize @bullet +@item +The file descriptor is deallocated. + +@item +Any record locks owned by the process on the file are unlocked. + +@item +When all file descriptors associated with a pipe or FIFO have been closed, +any unread data is discarded. +@end itemize + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{close} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this, calls to @code{close} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The normal return value from @code{close} is @math{0}; a value of @math{-1} +is returned in case of failure. The following @code{errno} error +conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item EINTR +The @code{close} call was interrupted by a signal. +@xref{Interrupted Primitives}. +Here is an example of how to handle @code{EINTR} properly: + +@smallexample +TEMP_FAILURE_RETRY (close (desc)); +@end smallexample + +@item ENOSPC +@itemx EIO +@itemx EDQUOT +When the file is accessed by NFS, these errors from @code{write} can sometimes +not be detected until @code{close}. @xref{I/O Primitives}, for details +on their meaning. +@end table + +Please note that there is @emph{no} separate @code{close64} function. +This is not necessary since this function does not determine nor depend +on the mode of the file. The kernel which performs the @code{close} +operation knows which mode the descriptor is used for and can handle +this situation. +@end deftypefun + +To close a stream, call @code{fclose} (@pxref{Closing Streams}) instead +of trying to close its underlying file descriptor with @code{close}. +This flushes any buffered output and updates the stream object to +indicate that it is closed. + +@node I/O Primitives +@section Input and Output Primitives + +This section describes the functions for performing primitive input and +output operations on file descriptors: @code{read}, @code{write}, and +@code{lseek}. These functions are declared in the header file +@file{unistd.h}. +@pindex unistd.h + +@comment unistd.h +@comment POSIX.1 +@deftp {Data Type} ssize_t +This data type is used to represent the sizes of blocks that can be +read or written in a single operation. It is similar to @code{size_t}, +but must be a signed type. +@end deftp + +@cindex reading from a file descriptor +@comment unistd.h +@comment POSIX.1 +@deftypefun ssize_t read (int @var{filedes}, void *@var{buffer}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{read} function reads up to @var{size} bytes from the file +with descriptor @var{filedes}, storing the results in the @var{buffer}. +(This is not necessarily a character string, and no terminating null +character is added.) + +@cindex end-of-file, on a file descriptor +The return value is the number of bytes actually read. This might be +less than @var{size}; for example, if there aren't that many bytes left +in the file or if there aren't that many bytes immediately available. +The exact behavior depends on what kind of file it is. Note that +reading less than @var{size} bytes is not an error. + +A value of zero indicates end-of-file (except if the value of the +@var{size} argument is also zero). This is not considered an error. +If you keep calling @code{read} while at end-of-file, it will keep +returning zero and doing nothing else. + +If @code{read} returns at least one character, there is no way you can +tell whether end-of-file was reached. But if you did reach the end, the +next read will return zero. + +In case of an error, @code{read} returns @math{-1}. The following +@code{errno} error conditions are defined for this function: + +@table @code +@item EAGAIN +Normally, when no input is immediately available, @code{read} waits for +some input. But if the @code{O_NONBLOCK} flag is set for the file +(@pxref{File Status Flags}), @code{read} returns immediately without +reading any data, and reports this error. + +@strong{Compatibility Note:} Most versions of BSD Unix use a different +error code for this: @code{EWOULDBLOCK}. In @theglibc{}, +@code{EWOULDBLOCK} is an alias for @code{EAGAIN}, so it doesn't matter +which name you use. + +On some systems, reading a large amount of data from a character special +file can also fail with @code{EAGAIN} if the kernel cannot find enough +physical memory to lock down the user's pages. This is limited to +devices that transfer with direct memory access into the user's memory, +which means it does not include terminals, since they always use +separate buffers inside the kernel. This problem never happens on +@gnuhurdsystems{}. + +Any condition that could result in @code{EAGAIN} can instead result in a +successful @code{read} which returns fewer bytes than requested. +Calling @code{read} again immediately would result in @code{EAGAIN}. + +@item EBADF +The @var{filedes} argument is not a valid file descriptor, +or is not open for reading. + +@item EINTR +@code{read} was interrupted by a signal while it was waiting for input. +@xref{Interrupted Primitives}. A signal will not necessarily cause +@code{read} to return @code{EINTR}; it may instead result in a +successful @code{read} which returns fewer bytes than requested. + +@item EIO +For many devices, and for disk files, this error code indicates +a hardware error. + +@code{EIO} also occurs when a background process tries to read from the +controlling terminal, and the normal action of stopping the process by +sending it a @code{SIGTTIN} signal isn't working. This might happen if +the signal is being blocked or ignored, or because the process group is +orphaned. @xref{Job Control}, for more information about job control, +and @ref{Signal Handling}, for information about signals. + +@item EINVAL +In some systems, when reading from a character or block device, position +and size offsets must be aligned to a particular block size. This error +indicates that the offsets were not properly aligned. +@end table + +Please note that there is no function named @code{read64}. This is not +necessary since this function does not directly modify or handle the +possibly wide file offset. Since the kernel handles this state +internally, the @code{read} function can be used for all cases. + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{read} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this, calls to @code{read} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The @code{read} function is the underlying primitive for all of the +functions that read from streams, such as @code{fgetc}. +@end deftypefun + +@comment unistd.h +@comment Unix98 +@deftypefun ssize_t pread (int @var{filedes}, void *@var{buffer}, size_t @var{size}, off_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is usually a safe syscall. The sysdeps/posix fallback emulation +@c is not MT-Safe because it uses lseek, read and lseek back, but is it +@c used anywhere? +The @code{pread} function is similar to the @code{read} function. The +first three arguments are identical, and the return values and error +codes also correspond. + +The difference is the fourth argument and its handling. The data block +is not read from the current position of the file descriptor +@code{filedes}. Instead the data is read from the file starting at +position @var{offset}. The position of the file descriptor itself is +not affected by the operation. The value is the same as before the call. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{pread} function is in fact @code{pread64} and the type +@code{off_t} has 64 bits, which makes it possible to handle files up to +@twoexp{63} bytes in length. + +The return value of @code{pread} describes the number of bytes read. +In the error case it returns @math{-1} like @code{read} does and the +error codes are also the same, with these additions: + +@table @code +@item EINVAL +The value given for @var{offset} is negative and therefore illegal. + +@item ESPIPE +The file descriptor @var{filedes} is associated with a pipe or a FIFO and +this device does not allow positioning of the file pointer. +@end table + +The function is an extension defined in the Unix Single Specification +version 2. +@end deftypefun + +@comment unistd.h +@comment Unix98 +@deftypefun ssize_t pread64 (int @var{filedes}, void *@var{buffer}, size_t @var{size}, off64_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is usually a safe syscall. The sysdeps/posix fallback emulation +@c is not MT-Safe because it uses lseek64, read and lseek64 back, but is +@c it used anywhere? +This function is similar to the @code{pread} function. The difference +is that the @var{offset} parameter is of type @code{off64_t} instead of +@code{off_t} which makes it possible on 32 bit machines to address +files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The +file descriptor @code{filedes} must be opened using @code{open64} since +otherwise the large offsets possible with @code{off64_t} will lead to +errors with a descriptor in small file mode. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine this function is actually available under the name +@code{pread} and so transparently replaces the 32 bit interface. +@end deftypefun + +@cindex writing to a file descriptor +@comment unistd.h +@comment POSIX.1 +@deftypefun ssize_t write (int @var{filedes}, const void *@var{buffer}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Some say write is thread-unsafe on Linux without O_APPEND. In the VFS layer +@c the vfs_write() does no locking around the acquisition of a file offset and +@c therefore multiple threads / kernel tasks may race and get the same offset +@c resulting in data loss. +@c +@c See: +@c http://thread.gmane.org/gmane.linux.kernel/397980 +@c http://lwn.net/Articles/180387/ +@c +@c The counter argument is that POSIX only says that the write starts at the +@c file position and that the file position is updated *before* the function +@c returns. What that really means is that any expectation of atomic writes is +@c strictly an invention of the interpretation of the reader. Data loss could +@c happen if two threads start the write at the same time. Only writes that +@c come after the return of another write are guaranteed to follow the other +@c write. +@c +@c The other side of the coin is that POSIX goes on further to say in +@c "2.9.7 Thread Interactions with Regular File Operations" that threads +@c should never see interleaving sets of file operations, but it is insane +@c to do anything like that because it kills performance, so you don't get +@c those guarantees in Linux. +@c +@c So we mark it thread safe, it doesn't blow up, but you might loose +@c data, and we don't strictly meet the POSIX requirements. +@c +@c The fix for file offsets racing was merged in 3.14, the commits were: +@c 9c225f2655e36a470c4f58dbbc99244c5fc7f2d4, and +@c d7a15f8d0777955986a2ab00ab181795cab14b01. Therefore after Linux 3.14 you +@c should get mostly MT-safe writes. +The @code{write} function writes up to @var{size} bytes from +@var{buffer} to the file with descriptor @var{filedes}. The data in +@var{buffer} is not necessarily a character string and a null character is +output like any other character. + +The return value is the number of bytes actually written. This may be +@var{size}, but can always be smaller. Your program should always call +@code{write} in a loop, iterating until all the data is written. + +Once @code{write} returns, the data is enqueued to be written and can be +read back right away, but it is not necessarily written out to permanent +storage immediately. You can use @code{fsync} when you need to be sure +your data has been permanently stored before continuing. (It is more +efficient for the system to batch up consecutive writes and do them all +at once when convenient. Normally they will always be written to disk +within a minute or less.) Modern systems provide another function +@code{fdatasync} which guarantees integrity only for the file data and +is therefore faster. +@c !!! xref fsync, fdatasync +You can use the @code{O_FSYNC} open mode to make @code{write} always +store the data to disk before returning; @pxref{Operating Modes}. + +In the case of an error, @code{write} returns @math{-1}. The following +@code{errno} error conditions are defined for this function: + +@table @code +@item EAGAIN +Normally, @code{write} blocks until the write operation is complete. +But if the @code{O_NONBLOCK} flag is set for the file (@pxref{Control +Operations}), it returns immediately without writing any data and +reports this error. An example of a situation that might cause the +process to block on output is writing to a terminal device that supports +flow control, where output has been suspended by receipt of a STOP +character. + +@strong{Compatibility Note:} Most versions of BSD Unix use a different +error code for this: @code{EWOULDBLOCK}. In @theglibc{}, +@code{EWOULDBLOCK} is an alias for @code{EAGAIN}, so it doesn't matter +which name you use. + +On some systems, writing a large amount of data from a character special +file can also fail with @code{EAGAIN} if the kernel cannot find enough +physical memory to lock down the user's pages. This is limited to +devices that transfer with direct memory access into the user's memory, +which means it does not include terminals, since they always use +separate buffers inside the kernel. This problem does not arise on +@gnuhurdsystems{}. + +@item EBADF +The @var{filedes} argument is not a valid file descriptor, +or is not open for writing. + +@item EFBIG +The size of the file would become larger than the implementation can support. + +@item EINTR +The @code{write} operation was interrupted by a signal while it was +blocked waiting for completion. A signal will not necessarily cause +@code{write} to return @code{EINTR}; it may instead result in a +successful @code{write} which writes fewer bytes than requested. +@xref{Interrupted Primitives}. + +@item EIO +For many devices, and for disk files, this error code indicates +a hardware error. + +@item ENOSPC +The device containing the file is full. + +@item EPIPE +This error is returned when you try to write to a pipe or FIFO that +isn't open for reading by any process. When this happens, a @code{SIGPIPE} +signal is also sent to the process; see @ref{Signal Handling}. + +@item EINVAL +In some systems, when writing to a character or block device, position +and size offsets must be aligned to a particular block size. This error +indicates that the offsets were not properly aligned. +@end table + +Unless you have arranged to prevent @code{EINTR} failures, you should +check @code{errno} after each failing call to @code{write}, and if the +error was @code{EINTR}, you should simply repeat the call. +@xref{Interrupted Primitives}. The easy way to do this is with the +macro @code{TEMP_FAILURE_RETRY}, as follows: + +@smallexample +nbytes = TEMP_FAILURE_RETRY (write (desc, buffer, count)); +@end smallexample + +Please note that there is no function named @code{write64}. This is not +necessary since this function does not directly modify or handle the +possibly wide file offset. Since the kernel handles this state +internally the @code{write} function can be used for all cases. + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{write} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this, calls to @code{write} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The @code{write} function is the underlying primitive for all of the +functions that write to streams, such as @code{fputc}. +@end deftypefun + +@comment unistd.h +@comment Unix98 +@deftypefun ssize_t pwrite (int @var{filedes}, const void *@var{buffer}, size_t @var{size}, off_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is usually a safe syscall. The sysdeps/posix fallback emulation +@c is not MT-Safe because it uses lseek, write and lseek back, but is it +@c used anywhere? +The @code{pwrite} function is similar to the @code{write} function. The +first three arguments are identical, and the return values and error codes +also correspond. + +The difference is the fourth argument and its handling. The data block +is not written to the current position of the file descriptor +@code{filedes}. Instead the data is written to the file starting at +position @var{offset}. The position of the file descriptor itself is +not affected by the operation. The value is the same as before the call. + +However, on Linux, if a file is opened with @code{O_APPEND}, @code{pwrite} +appends data to the end of the file, regardless of the value of +@code{offset}. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{pwrite} function is in fact @code{pwrite64} and the type +@code{off_t} has 64 bits, which makes it possible to handle files up to +@twoexp{63} bytes in length. + +The return value of @code{pwrite} describes the number of written bytes. +In the error case it returns @math{-1} like @code{write} does and the +error codes are also the same, with these additions: + +@table @code +@item EINVAL +The value given for @var{offset} is negative and therefore illegal. + +@item ESPIPE +The file descriptor @var{filedes} is associated with a pipe or a FIFO and +this device does not allow positioning of the file pointer. +@end table + +The function is an extension defined in the Unix Single Specification +version 2. +@end deftypefun + +@comment unistd.h +@comment Unix98 +@deftypefun ssize_t pwrite64 (int @var{filedes}, const void *@var{buffer}, size_t @var{size}, off64_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is usually a safe syscall. The sysdeps/posix fallback emulation +@c is not MT-Safe because it uses lseek64, write and lseek64 back, but +@c is it used anywhere? +This function is similar to the @code{pwrite} function. The difference +is that the @var{offset} parameter is of type @code{off64_t} instead of +@code{off_t} which makes it possible on 32 bit machines to address +files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The +file descriptor @code{filedes} must be opened using @code{open64} since +otherwise the large offsets possible with @code{off64_t} will lead to +errors with a descriptor in small file mode. + +When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine this function is actually available under the name +@code{pwrite} and so transparently replaces the 32 bit interface. +@end deftypefun + +@comment sys/uio.h +@comment BSD +@deftypefun ssize_t preadv (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is a syscall for Linux 3.2 for all architectures but microblaze +@c (which was added on 3.15). The sysdeps/posix fallback emulation +@c is also MT-Safe since it calls pread, and it is now a syscall on all +@c targets. + +This function is similar to the @code{readv} function, with the difference +it adds an extra @var{offset} parameter of type @code{off_t} similar to +@code{pread}. The data is written to the file starting at position +@var{offset}. The position of the file descriptor itself is not affected +by the operation. The value is the same as before the call. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{preadv} function is in fact @code{preadv64} and the type +@code{off_t} has 64 bits, which makes it possible to handle files up to +@twoexp{63} bytes in length. + +The return value is a count of bytes (@emph{not} buffers) read, @math{0} +indicating end-of-file, or @math{-1} indicating an error. The possible +errors are the same as in @code{readv} and @code{pread}. +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun ssize_t preadv64 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off64_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is a syscall for Linux 3.2 for all architectures but microblaze +@c (which was added on 3.15). The sysdeps/posix fallback emulation +@c is also MT-Safe since it calls pread64, and it is now a syscall on all +@c targets. + +This function is similar to the @code{preadv} function with the difference +is that the @var{offset} parameter is of type @code{off64_t} instead of +@code{off_t}. It makes it possible on 32 bit machines to address +files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The +file descriptor @code{filedes} must be opened using @code{open64} since +otherwise the large offsets possible with @code{off64_t} will lead to +errors with a descriptor in small file mode. + +When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine this function is actually available under the name +@code{preadv} and so transparently replaces the 32 bit interface. +@end deftypefun + +@comment sys/uio.h +@comment BSD +@deftypefun ssize_t pwritev (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is a syscall for Linux 3.2 for all architectures but microblaze +@c (which was added on 3.15). The sysdeps/posix fallback emulation +@c is also MT-Safe since it calls pwrite, and it is now a syscall on all +@c targets. + +This function is similar to the @code{writev} function, with the difference +it adds an extra @var{offset} parameter of type @code{off_t} similar to +@code{pwrite}. The data is written to the file starting at position +@var{offset}. The position of the file descriptor itself is not affected +by the operation. The value is the same as before the call. + +However, on Linux, if a file is opened with @code{O_APPEND}, @code{pwrite} +appends data to the end of the file, regardless of the value of +@code{offset}. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{pwritev} function is in fact @code{pwritev64} and the type +@code{off_t} has 64 bits, which makes it possible to handle files up to +@twoexp{63} bytes in length. + +The return value is a count of bytes (@emph{not} buffers) written, @math{0} +indicating end-of-file, or @math{-1} indicating an error. The possible +errors are the same as in @code{writev} and @code{pwrite}. +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun ssize_t pwritev64 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off64_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is a syscall for Linux 3.2 for all architectures but microblaze +@c (which was added on 3.15). The sysdeps/posix fallback emulation +@c is also MT-Safe since it calls pwrite64, and it is now a syscall on all +@c targets. + +This function is similar to the @code{pwritev} function with the difference +is that the @var{offset} parameter is of type @code{off64_t} instead of +@code{off_t}. It makes it possible on 32 bit machines to address +files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The +file descriptor @code{filedes} must be opened using @code{open64} since +otherwise the large offsets possible with @code{off64_t} will lead to +errors with a descriptor in small file mode. + +When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine this function is actually available under the name +@code{pwritev} and so transparently replaces the 32 bit interface. +@end deftypefun + +@comment sys/uio.h +@comment GNU +@deftypefun ssize_t preadv2 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off_t @var{offset}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is a syscall for Linux v4.6. The sysdeps/posix fallback emulation +@c is also MT-Safe since it calls preadv. + +This function is similar to the @code{preadv} function, with the difference +it adds an extra @var{flags} parameter of type @code{int}. The supported +@var{flags} are dependent of the underlying system. For Linux it supports: + +@vtable @code +@item RWF_HIPRI +High priority request. This adds a flag that tells the file system that +this is a high priority request for which it is worth to poll the hardware. +The flag is purely advisory and can be ignored if not supported. The +@var{fd} must be opened using @code{O_DIRECT}. + +@item RWF_DSYNC +Per-IO synchronization as if the file was opened with @code{O_DSYNC} flag. + +@item RWF_SYNC +Per-IO synchronization as if the file was opened with @code{O_SYNC} flag. +@end vtable + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{preadv2} function is in fact @code{preadv64v2} and the type +@code{off_t} has 64 bits, which makes it possible to handle files up to +@twoexp{63} bytes in length. + +The return value is a count of bytes (@emph{not} buffers) read, @math{0} +indicating end-of-file, or @math{-1} indicating an error. The possible +errors are the same as in @code{preadv} with the addition of: + +@table @code + +@item EOPNOTSUPP + +@c The default sysdeps/posix code will return it for any flags value +@c different than 0. +An unsupported @var{flags} was used. + +@end table + +@end deftypefun + +@comment unistd.h +@comment GNU +@deftypefun ssize_t preadv64v2 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off64_t @var{offset}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is a syscall for Linux v4.6. The sysdeps/posix fallback emulation +@c is also MT-Safe since it calls preadv. + +This function is similar to the @code{preadv2} function with the difference +is that the @var{offset} parameter is of type @code{off64_t} instead of +@code{off_t}. It makes it possible on 32 bit machines to address +files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The +file descriptor @code{filedes} must be opened using @code{open64} since +otherwise the large offsets possible with @code{off64_t} will lead to +errors with a descriptor in small file mode. + +When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine this function is actually available under the name +@code{preadv2} and so transparently replaces the 32 bit interface. +@end deftypefun + + +@comment sys/uio.h +@comment GNU +@deftypefun ssize_t pwritev2 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off_t @var{offset}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is a syscall for Linux v4.6. The sysdeps/posix fallback emulation +@c is also MT-Safe since it calls pwritev. + +This function is similar to the @code{pwritev} function, with the difference +it adds an extra @var{flags} parameter of type @code{int}. The supported +@var{flags} are dependent of the underlying system and for Linux it supports +the same ones as for @code{preadv2}. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{pwritev2} function is in fact @code{pwritev64v2} and the type +@code{off_t} has 64 bits, which makes it possible to handle files up to +@twoexp{63} bytes in length. + +The return value is a count of bytes (@emph{not} buffers) write, @math{0} +indicating end-of-file, or @math{-1} indicating an error. The possible +errors are the same as in @code{preadv2}. +@end deftypefun + +@comment unistd.h +@comment GNU +@deftypefun ssize_t pwritev64v2 (int @var{fd}, const struct iovec *@var{iov}, int @var{iovcnt}, off64_t @var{offset}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This is a syscall for Linux v4.6. The sysdeps/posix fallback emulation +@c is also MT-Safe since it calls pwritev. + +This function is similar to the @code{pwritev2} function with the difference +is that the @var{offset} parameter is of type @code{off64_t} instead of +@code{off_t}. It makes it possible on 32 bit machines to address +files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The +file descriptor @code{filedes} must be opened using @code{open64} since +otherwise the large offsets possible with @code{off64_t} will lead to +errors with a descriptor in small file mode. + +When the source file is compiled using @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine this function is actually available under the name +@code{pwritev2} and so transparently replaces the 32 bit interface. +@end deftypefun + + +@node File Position Primitive +@section Setting the File Position of a Descriptor + +Just as you can set the file position of a stream with @code{fseek}, you +can set the file position of a descriptor with @code{lseek}. This +specifies the position in the file for the next @code{read} or +@code{write} operation. @xref{File Positioning}, for more information +on the file position and what it means. + +To read the current file position value from a descriptor, use +@code{lseek (@var{desc}, 0, SEEK_CUR)}. + +@cindex file positioning on a file descriptor +@cindex positioning a file descriptor +@cindex seeking on a file descriptor +@comment unistd.h +@comment POSIX.1 +@deftypefun off_t lseek (int @var{filedes}, off_t @var{offset}, int @var{whence}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{lseek} function is used to change the file position of the +file with descriptor @var{filedes}. + +The @var{whence} argument specifies how the @var{offset} should be +interpreted, in the same way as for the @code{fseek} function, and it must +be one of the symbolic constants @code{SEEK_SET}, @code{SEEK_CUR}, or +@code{SEEK_END}. + +@vtable @code +@item SEEK_SET +Specifies that @var{offset} is a count of characters from the beginning +of the file. + +@item SEEK_CUR +Specifies that @var{offset} is a count of characters from the current +file position. This count may be positive or negative. + +@item SEEK_END +Specifies that @var{offset} is a count of characters from the end of +the file. A negative count specifies a position within the current +extent of the file; a positive count specifies a position past the +current end. If you set the position past the current end, and +actually write data, you will extend the file with zeros up to that +position. +@end vtable + +The return value from @code{lseek} is normally the resulting file +position, measured in bytes from the beginning of the file. +You can use this feature together with @code{SEEK_CUR} to read the +current file position. + +If you want to append to the file, setting the file position to the +current end of file with @code{SEEK_END} is not sufficient. Another +process may write more data after you seek but before you write, +extending the file so the position you write onto clobbers their data. +Instead, use the @code{O_APPEND} operating mode; @pxref{Operating Modes}. + +You can set the file position past the current end of the file. This +does not by itself make the file longer; @code{lseek} never changes the +file. But subsequent output at that position will extend the file. +Characters between the previous end of file and the new position are +filled with zeros. Extending the file in this way can create a +``hole'': the blocks of zeros are not actually allocated on disk, so the +file takes up less space than it appears to; it is then called a +``sparse file''. +@cindex sparse files +@cindex holes in files + +If the file position cannot be changed, or the operation is in some way +invalid, @code{lseek} returns a value of @math{-1}. The following +@code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} is not a valid file descriptor. + +@item EINVAL +The @var{whence} argument value is not valid, or the resulting +file offset is not valid. A file offset is invalid. + +@item ESPIPE +The @var{filedes} corresponds to an object that cannot be positioned, +such as a pipe, FIFO or terminal device. (POSIX.1 specifies this error +only for pipes and FIFOs, but on @gnusystems{}, you always get +@code{ESPIPE} if the object is not seekable.) +@end table + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} the +@code{lseek} function is in fact @code{lseek64} and the type +@code{off_t} has 64 bits which makes it possible to handle files up to +@twoexp{63} bytes in length. + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{lseek} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this calls to @code{lseek} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The @code{lseek} function is the underlying primitive for the +@code{fseek}, @code{fseeko}, @code{ftell}, @code{ftello} and +@code{rewind} functions, which operate on streams instead of file +descriptors. +@end deftypefun + +@comment unistd.h +@comment Unix98 +@deftypefun off64_t lseek64 (int @var{filedes}, off64_t @var{offset}, int @var{whence}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to the @code{lseek} function. The difference +is that the @var{offset} parameter is of type @code{off64_t} instead of +@code{off_t} which makes it possible on 32 bit machines to address +files larger than @twoexp{31} bytes and up to @twoexp{63} bytes. The +file descriptor @code{filedes} must be opened using @code{open64} since +otherwise the large offsets possible with @code{off64_t} will lead to +errors with a descriptor in small file mode. + +When the source file is compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bits machine this function is actually available under the name +@code{lseek} and so transparently replaces the 32 bit interface. +@end deftypefun + +You can have multiple descriptors for the same file if you open the file +more than once, or if you duplicate a descriptor with @code{dup}. +Descriptors that come from separate calls to @code{open} have independent +file positions; using @code{lseek} on one descriptor has no effect on the +other. For example, + +@smallexample +@group +@{ + int d1, d2; + char buf[4]; + d1 = open ("foo", O_RDONLY); + d2 = open ("foo", O_RDONLY); + lseek (d1, 1024, SEEK_SET); + read (d2, buf, 4); +@} +@end group +@end smallexample + +@noindent +will read the first four characters of the file @file{foo}. (The +error-checking code necessary for a real program has been omitted here +for brevity.) + +By contrast, descriptors made by duplication share a common file +position with the original descriptor that was duplicated. Anything +which alters the file position of one of the duplicates, including +reading or writing data, affects all of them alike. Thus, for example, + +@smallexample +@{ + int d1, d2, d3; + char buf1[4], buf2[4]; + d1 = open ("foo", O_RDONLY); + d2 = dup (d1); + d3 = dup (d2); + lseek (d3, 1024, SEEK_SET); + read (d1, buf1, 4); + read (d2, buf2, 4); +@} +@end smallexample + +@noindent +will read four characters starting with the 1024'th character of +@file{foo}, and then four more characters starting with the 1028'th +character. + +@comment sys/types.h +@comment POSIX.1 +@deftp {Data Type} off_t +This is a signed integer type used to represent file sizes. In +@theglibc{}, this type is no narrower than @code{int}. + +If the source is compiled with @code{_FILE_OFFSET_BITS == 64} this type +is transparently replaced by @code{off64_t}. +@end deftp + +@comment sys/types.h +@comment Unix98 +@deftp {Data Type} off64_t +This type is used similar to @code{off_t}. The difference is that even +on 32 bit machines, where the @code{off_t} type would have 32 bits, +@code{off64_t} has 64 bits and so is able to address files up to +@twoexp{63} bytes in length. + +When compiling with @code{_FILE_OFFSET_BITS == 64} this type is +available under the name @code{off_t}. +@end deftp + +These aliases for the @samp{SEEK_@dots{}} constants exist for the sake +of compatibility with older BSD systems. They are defined in two +different header files: @file{fcntl.h} and @file{sys/file.h}. + +@vtable @code +@item L_SET +An alias for @code{SEEK_SET}. + +@item L_INCR +An alias for @code{SEEK_CUR}. + +@item L_XTND +An alias for @code{SEEK_END}. +@end vtable + +@node Descriptors and Streams +@section Descriptors and Streams +@cindex streams, and file descriptors +@cindex converting file descriptor to stream +@cindex extracting file descriptor from stream + +Given an open file descriptor, you can create a stream for it with the +@code{fdopen} function. You can get the underlying file descriptor for +an existing stream with the @code{fileno} function. These functions are +declared in the header file @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment POSIX.1 +@deftypefun {FILE *} fdopen (int @var{filedes}, const char *@var{opentype}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @aculock{}}} +The @code{fdopen} function returns a new stream for the file descriptor +@var{filedes}. + +The @var{opentype} argument is interpreted in the same way as for the +@code{fopen} function (@pxref{Opening Streams}), except that +the @samp{b} option is not permitted; this is because @gnusystems{} make no +distinction between text and binary files. Also, @code{"w"} and +@code{"w+"} do not cause truncation of the file; these have an effect only +when opening a file, and in this case the file has already been opened. +You must make sure that the @var{opentype} argument matches the actual +mode of the open file descriptor. + +The return value is the new stream. If the stream cannot be created +(for example, if the modes for the file indicated by the file descriptor +do not permit the access specified by the @var{opentype} argument), a +null pointer is returned instead. + +In some other systems, @code{fdopen} may fail to detect that the modes +for file descriptors do not permit the access specified by +@code{opentype}. @Theglibc{} always checks for this. +@end deftypefun + +For an example showing the use of the @code{fdopen} function, +see @ref{Creating a Pipe}. + +@comment stdio.h +@comment POSIX.1 +@deftypefun int fileno (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function returns the file descriptor associated with the stream +@var{stream}. If an error is detected (for example, if the @var{stream} +is not valid) or if @var{stream} does not do I/O to a file, +@code{fileno} returns @math{-1}. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int fileno_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fileno_unlocked} function is equivalent to the @code{fileno} +function except that it does not implicitly lock the stream if the state +is @code{FSETLOCKING_INTERNAL}. + +This function is a GNU extension. +@end deftypefun + +@cindex standard file descriptors +@cindex file descriptors, standard +There are also symbolic constants defined in @file{unistd.h} for the +file descriptors belonging to the standard streams @code{stdin}, +@code{stdout}, and @code{stderr}; see @ref{Standard Streams}. +@pindex unistd.h + +@vtable @code +@comment unistd.h +@comment POSIX.1 +@item STDIN_FILENO +This macro has value @code{0}, which is the file descriptor for +standard input. +@cindex standard input file descriptor + +@comment unistd.h +@comment POSIX.1 +@item STDOUT_FILENO +This macro has value @code{1}, which is the file descriptor for +standard output. +@cindex standard output file descriptor + +@comment unistd.h +@comment POSIX.1 +@item STDERR_FILENO +This macro has value @code{2}, which is the file descriptor for +standard error output. +@end vtable +@cindex standard error file descriptor + +@node Stream/Descriptor Precautions +@section Dangers of Mixing Streams and Descriptors +@cindex channels +@cindex streams and descriptors +@cindex descriptors and streams +@cindex mixing descriptors and streams + +You can have multiple file descriptors and streams (let's call both +streams and descriptors ``channels'' for short) connected to the same +file, but you must take care to avoid confusion between channels. There +are two cases to consider: @dfn{linked} channels that share a single +file position value, and @dfn{independent} channels that have their own +file positions. + +It's best to use just one channel in your program for actual data +transfer to any given file, except when all the access is for input. +For example, if you open a pipe (something you can only do at the file +descriptor level), either do all I/O with the descriptor, or construct a +stream from the descriptor with @code{fdopen} and then do all I/O with +the stream. + +@menu +* Linked Channels:: Dealing with channels sharing a file position. +* Independent Channels:: Dealing with separately opened, unlinked channels. +* Cleaning Streams:: Cleaning a stream makes it safe to use + another channel. +@end menu + +@node Linked Channels +@subsection Linked Channels +@cindex linked channels + +Channels that come from a single opening share the same file position; +we call them @dfn{linked} channels. Linked channels result when you +make a stream from a descriptor using @code{fdopen}, when you get a +descriptor from a stream with @code{fileno}, when you copy a descriptor +with @code{dup} or @code{dup2}, and when descriptors are inherited +during @code{fork}. For files that don't support random access, such as +terminals and pipes, @emph{all} channels are effectively linked. On +random-access files, all append-type output streams are effectively +linked to each other. + +@cindex cleaning up a stream +If you have been using a stream for I/O (or have just opened the stream), +and you want to do I/O using +another channel (either a stream or a descriptor) that is linked to it, +you must first @dfn{clean up} the stream that you have been using. +@xref{Cleaning Streams}. + +Terminating a process, or executing a new program in the process, +destroys all the streams in the process. If descriptors linked to these +streams persist in other processes, their file positions become +undefined as a result. To prevent this, you must clean up the streams +before destroying them. + +@node Independent Channels +@subsection Independent Channels +@cindex independent channels + +When you open channels (streams or descriptors) separately on a seekable +file, each channel has its own file position. These are called +@dfn{independent channels}. + +The system handles each channel independently. Most of the time, this +is quite predictable and natural (especially for input): each channel +can read or write sequentially at its own place in the file. However, +if some of the channels are streams, you must take these precautions: + +@itemize @bullet +@item +You should clean an output stream after use, before doing anything else +that might read or write from the same part of the file. + +@item +You should clean an input stream before reading data that may have been +modified using an independent channel. Otherwise, you might read +obsolete data that had been in the stream's buffer. +@end itemize + +If you do output to one channel at the end of the file, this will +certainly leave the other independent channels positioned somewhere +before the new end. You cannot reliably set their file positions to the +new end of file before writing, because the file can always be extended +by another process between when you set the file position and when you +write the data. Instead, use an append-type descriptor or stream; they +always output at the current end of the file. In order to make the +end-of-file position accurate, you must clean the output channel you +were using, if it is a stream. + +It's impossible for two channels to have separate file pointers for a +file that doesn't support random access. Thus, channels for reading or +writing such files are always linked, never independent. Append-type +channels are also always linked. For these channels, follow the rules +for linked channels; see @ref{Linked Channels}. + +@node Cleaning Streams +@subsection Cleaning Streams + +You can use @code{fflush} to clean a stream in most +cases. + +You can skip the @code{fflush} if you know the stream +is already clean. A stream is clean whenever its buffer is empty. For +example, an unbuffered stream is always clean. An input stream that is +at end-of-file is clean. A line-buffered stream is clean when the last +character output was a newline. However, a just-opened input stream +might not be clean, as its input buffer might not be empty. + +There is one case in which cleaning a stream is impossible on most +systems. This is when the stream is doing input from a file that is not +random-access. Such streams typically read ahead, and when the file is +not random access, there is no way to give back the excess data already +read. When an input stream reads from a random-access file, +@code{fflush} does clean the stream, but leaves the file pointer at an +unpredictable place; you must set the file pointer before doing any +further I/O. + +Closing an output-only stream also does @code{fflush}, so this is a +valid way of cleaning an output stream. + +You need not clean a stream before using its descriptor for control +operations such as setting terminal modes; these operations don't affect +the file position and are not affected by it. You can use any +descriptor for these operations, and all channels are affected +simultaneously. However, text already ``output'' to a stream but still +buffered by the stream will be subject to the new terminal modes when +subsequently flushed. To make sure ``past'' output is covered by the +terminal settings that were in effect at the time, flush the output +streams for that terminal before setting the modes. @xref{Terminal +Modes}. + +@node Scatter-Gather +@section Fast Scatter-Gather I/O +@cindex scatter-gather + +Some applications may need to read or write data to multiple buffers, +which are separated in memory. Although this can be done easily enough +with multiple calls to @code{read} and @code{write}, it is inefficient +because there is overhead associated with each kernel call. + +Instead, many platforms provide special high-speed primitives to perform +these @dfn{scatter-gather} operations in a single kernel call. @Theglibc{} +will provide an emulation on any system that lacks these +primitives, so they are not a portability threat. They are defined in +@code{sys/uio.h}. + +These functions are controlled with arrays of @code{iovec} structures, +which describe the location and size of each buffer. + +@comment sys/uio.h +@comment BSD +@deftp {Data Type} {struct iovec} + +The @code{iovec} structure describes a buffer. It contains two fields: + +@table @code + +@item void *iov_base +Contains the address of a buffer. + +@item size_t iov_len +Contains the length of the buffer. + +@end table +@end deftp + +@comment sys/uio.h +@comment BSD +@deftypefun ssize_t readv (int @var{filedes}, const struct iovec *@var{vector}, int @var{count}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c The fallback sysdeps/posix implementation, used even on GNU/Linux +@c with old kernels that lack a full readv/writev implementation, may +@c malloc the buffer into which data is read, if the total read size is +@c too large for alloca. + +The @code{readv} function reads data from @var{filedes} and scatters it +into the buffers described in @var{vector}, which is taken to be +@var{count} structures long. As each buffer is filled, data is sent to the +next. + +Note that @code{readv} is not guaranteed to fill all the buffers. +It may stop at any point, for the same reasons @code{read} would. + +The return value is a count of bytes (@emph{not} buffers) read, @math{0} +indicating end-of-file, or @math{-1} indicating an error. The possible +errors are the same as in @code{read}. + +@end deftypefun + +@comment sys/uio.h +@comment BSD +@deftypefun ssize_t writev (int @var{filedes}, const struct iovec *@var{vector}, int @var{count}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c The fallback sysdeps/posix implementation, used even on GNU/Linux +@c with old kernels that lack a full readv/writev implementation, may +@c malloc the buffer from which data is written, if the total write size +@c is too large for alloca. + +The @code{writev} function gathers data from the buffers described in +@var{vector}, which is taken to be @var{count} structures long, and writes +them to @code{filedes}. As each buffer is written, it moves on to the +next. + +Like @code{readv}, @code{writev} may stop midstream under the same +conditions @code{write} would. + +The return value is a count of bytes written, or @math{-1} indicating an +error. The possible errors are the same as in @code{write}. + +@end deftypefun + +@c Note - I haven't read this anywhere. I surmised it from my knowledge +@c of computer science. Thus, there could be subtleties I'm missing. + +Note that if the buffers are small (under about 1kB), high-level streams +may be easier to use than these functions. However, @code{readv} and +@code{writev} are more efficient when the individual buffers themselves +(as opposed to the total output), are large. In that case, a high-level +stream would not be able to cache the data efficiently. + +@node Memory-mapped I/O +@section Memory-mapped I/O + +On modern operating systems, it is possible to @dfn{mmap} (pronounced +``em-map'') a file to a region of memory. When this is done, the file can +be accessed just like an array in the program. + +This is more efficient than @code{read} or @code{write}, as only the regions +of the file that a program actually accesses are loaded. Accesses to +not-yet-loaded parts of the mmapped region are handled in the same way as +swapped out pages. + +Since mmapped pages can be stored back to their file when physical +memory is low, it is possible to mmap files orders of magnitude larger +than both the physical memory @emph{and} swap space. The only limit is +address space. The theoretical limit is 4GB on a 32-bit machine - +however, the actual limit will be smaller since some areas will be +reserved for other purposes. If the LFS interface is used the file size +on 32-bit systems is not limited to 2GB (offsets are signed which +reduces the addressable area of 4GB by half); the full 64-bit are +available. + +Memory mapping only works on entire pages of memory. Thus, addresses +for mapping must be page-aligned, and length values will be rounded up. +To determine the size of a page the machine uses one should use + +@vindex _SC_PAGESIZE +@smallexample +size_t page_size = (size_t) sysconf (_SC_PAGESIZE); +@end smallexample + +@noindent +These functions are declared in @file{sys/mman.h}. + +@comment sys/mman.h +@comment POSIX +@deftypefun {void *} mmap (void *@var{address}, size_t @var{length}, int @var{protect}, int @var{flags}, int @var{filedes}, off_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +The @code{mmap} function creates a new mapping, connected to bytes +(@var{offset}) to (@var{offset} + @var{length} - 1) in the file open on +@var{filedes}. A new reference for the file specified by @var{filedes} +is created, which is not removed by closing the file. + +@var{address} gives a preferred starting address for the mapping. +@code{NULL} expresses no preference. Any previous mapping at that +address is automatically removed. The address you give may still be +changed, unless you use the @code{MAP_FIXED} flag. + +@vindex PROT_READ +@vindex PROT_WRITE +@vindex PROT_EXEC +@var{protect} contains flags that control what kind of access is +permitted. They include @code{PROT_READ}, @code{PROT_WRITE}, and +@code{PROT_EXEC}, which permit reading, writing, and execution, +respectively. Inappropriate access will cause a segfault (@pxref{Program +Error Signals}). + +Note that most hardware designs cannot support write permission without +read permission, and many do not distinguish read and execute permission. +Thus, you may receive wider permissions than you ask for, and mappings of +write-only files may be denied even if you do not use @code{PROT_READ}. + +@var{flags} contains flags that control the nature of the map. +One of @code{MAP_SHARED} or @code{MAP_PRIVATE} must be specified. + +They include: + +@vtable @code +@item MAP_PRIVATE +This specifies that writes to the region should never be written back +to the attached file. Instead, a copy is made for the process, and the +region will be swapped normally if memory runs low. No other process will +see the changes. + +Since private mappings effectively revert to ordinary memory +when written to, you must have enough virtual memory for a copy of +the entire mmapped region if you use this mode with @code{PROT_WRITE}. + +@item MAP_SHARED +This specifies that writes to the region will be written back to the +file. Changes made will be shared immediately with other processes +mmaping the same file. + +Note that actual writing may take place at any time. You need to use +@code{msync}, described below, if it is important that other processes +using conventional I/O get a consistent view of the file. + +@item MAP_FIXED +This forces the system to use the exact mapping address specified in +@var{address} and fail if it can't. + +@c One of these is official - the other is obviously an obsolete synonym +@c Which is which? +@item MAP_ANONYMOUS +@itemx MAP_ANON +This flag tells the system to create an anonymous mapping, not connected +to a file. @var{filedes} and @var{offset} are ignored, and the region is +initialized with zeros. + +Anonymous maps are used as the basic primitive to extend the heap on some +systems. They are also useful to share data between multiple tasks +without creating a file. + +On some systems using private anonymous mmaps is more efficient than using +@code{malloc} for large blocks. This is not an issue with @theglibc{}, +as the included @code{malloc} automatically uses @code{mmap} where appropriate. + +@c Linux has some other MAP_ options, which I have not discussed here. +@c MAP_DENYWRITE, MAP_EXECUTABLE and MAP_GROWSDOWN don't seem applicable to +@c user programs (and I don't understand the last two). MAP_LOCKED does +@c not appear to be implemented. + +@end vtable + +@code{mmap} returns the address of the new mapping, or +@code{MAP_FAILED} for an error. + +Possible errors include: + +@table @code + +@item EINVAL + +Either @var{address} was unusable, or inconsistent @var{flags} were +given. + +@item EACCES + +@var{filedes} was not open for the type of access specified in @var{protect}. + +@item ENOMEM + +Either there is not enough memory for the operation, or the process is +out of address space. + +@item ENODEV + +This file is of a type that doesn't support mapping. + +@item ENOEXEC + +The file is on a filesystem that doesn't support mapping. + +@c On Linux, EAGAIN will appear if the file has a conflicting mandatory lock. +@c However mandatory locks are not discussed in this manual. +@c +@c Similarly, ETXTBSY will occur if the MAP_DENYWRITE flag (not documented +@c here) is used and the file is already open for writing. + +@end table + +@end deftypefun + +@comment sys/mman.h +@comment LFS +@deftypefun {void *} mmap64 (void *@var{address}, size_t @var{length}, int @var{protect}, int @var{flags}, int @var{filedes}, off64_t @var{offset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c The page_shift auto detection when MMAP2_PAGE_SHIFT is -1 (it never +@c is) would be thread-unsafe. +The @code{mmap64} function is equivalent to the @code{mmap} function but +the @var{offset} parameter is of type @code{off64_t}. On 32-bit systems +this allows the file associated with the @var{filedes} descriptor to be +larger than 2GB. @var{filedes} must be a descriptor returned from a +call to @code{open64} or @code{fopen64} and @code{freopen64} where the +descriptor is retrieved with @code{fileno}. + +When the sources are translated with @code{_FILE_OFFSET_BITS == 64} this +function is actually available under the name @code{mmap}. I.e., the +new, extended API using 64 bit file sizes and offsets transparently +replaces the old API. +@end deftypefun + +@comment sys/mman.h +@comment POSIX +@deftypefun int munmap (void *@var{addr}, size_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{munmap} removes any memory maps from (@var{addr}) to (@var{addr} + +@var{length}). @var{length} should be the length of the mapping. + +It is safe to unmap multiple mappings in one command, or include unmapped +space in the range. It is also possible to unmap only part of an existing +mapping. However, only entire pages can be removed. If @var{length} is not +an even number of pages, it will be rounded up. + +It returns @math{0} for success and @math{-1} for an error. + +One error is possible: + +@table @code + +@item EINVAL +The memory range given was outside the user mmap range or wasn't page +aligned. + +@end table + +@end deftypefun + +@comment sys/mman.h +@comment POSIX +@deftypefun int msync (void *@var{address}, size_t @var{length}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +When using shared mappings, the kernel can write the file at any time +before the mapping is removed. To be certain data has actually been +written to the file and will be accessible to non-memory-mapped I/O, it +is necessary to use this function. + +It operates on the region @var{address} to (@var{address} + @var{length}). +It may be used on part of a mapping or multiple mappings, however the +region given should not contain any unmapped space. + +@var{flags} can contain some options: + +@vtable @code + +@item MS_SYNC + +This flag makes sure the data is actually written @emph{to disk}. +Normally @code{msync} only makes sure that accesses to a file with +conventional I/O reflect the recent changes. + +@item MS_ASYNC + +This tells @code{msync} to begin the synchronization, but not to wait for +it to complete. + +@c Linux also has MS_INVALIDATE, which I don't understand. + +@end vtable + +@code{msync} returns @math{0} for success and @math{-1} for +error. Errors include: + +@table @code + +@item EINVAL +An invalid region was given, or the @var{flags} were invalid. + +@item EFAULT +There is no existing mapping in at least part of the given region. + +@end table + +@end deftypefun + +@comment sys/mman.h +@comment GNU +@deftypefun {void *} mremap (void *@var{address}, size_t @var{length}, size_t @var{new_length}, int @var{flag}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +This function can be used to change the size of an existing memory +area. @var{address} and @var{length} must cover a region entirely mapped +in the same @code{mmap} statement. A new mapping with the same +characteristics will be returned with the length @var{new_length}. + +One option is possible, @code{MREMAP_MAYMOVE}. If it is given in +@var{flags}, the system may remove the existing mapping and create a new +one of the desired length in another location. + +The address of the resulting mapping is returned, or @math{-1}. Possible +error codes include: + +@table @code + +@item EFAULT +There is no existing mapping in at least part of the original region, or +the region covers two or more distinct mappings. + +@item EINVAL +The address given is misaligned or inappropriate. + +@item EAGAIN +The region has pages locked, and if extended it would exceed the +process's resource limit for locked pages. @xref{Limits on Resources}. + +@item ENOMEM +The region is private writable, and insufficient virtual memory is +available to extend it. Also, this error will occur if +@code{MREMAP_MAYMOVE} is not given and the extension would collide with +another mapped region. + +@end table +@end deftypefun + +This function is only available on a few systems. Except for performing +optional optimizations one should not rely on this function. + +Not all file descriptors may be mapped. Sockets, pipes, and most devices +only allow sequential access and do not fit into the mapping abstraction. +In addition, some regular files may not be mmapable, and older kernels may +not support mapping at all. Thus, programs using @code{mmap} should +have a fallback method to use should it fail. @xref{Mmap,,,standards,GNU +Coding Standards}. + +@comment sys/mman.h +@comment POSIX +@deftypefun int madvise (void *@var{addr}, size_t @var{length}, int @var{advice}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +This function can be used to provide the system with @var{advice} about +the intended usage patterns of the memory region starting at @var{addr} +and extending @var{length} bytes. + +The valid BSD values for @var{advice} are: + +@vtable @code + +@item MADV_NORMAL +The region should receive no further special treatment. + +@item MADV_RANDOM +The region will be accessed via random page references. The kernel +should page-in the minimal number of pages for each page fault. + +@item MADV_SEQUENTIAL +The region will be accessed via sequential page references. This +may cause the kernel to aggressively read-ahead, expecting further +sequential references after any page fault within this region. + +@item MADV_WILLNEED +The region will be needed. The pages within this region may +be pre-faulted in by the kernel. + +@item MADV_DONTNEED +The region is no longer needed. The kernel may free these pages, +causing any changes to the pages to be lost, as well as swapped +out pages to be discarded. + +@end vtable + +The POSIX names are slightly different, but with the same meanings: + +@vtable @code + +@item POSIX_MADV_NORMAL +This corresponds with BSD's @code{MADV_NORMAL}. + +@item POSIX_MADV_RANDOM +This corresponds with BSD's @code{MADV_RANDOM}. + +@item POSIX_MADV_SEQUENTIAL +This corresponds with BSD's @code{MADV_SEQUENTIAL}. + +@item POSIX_MADV_WILLNEED +This corresponds with BSD's @code{MADV_WILLNEED}. + +@item POSIX_MADV_DONTNEED +This corresponds with BSD's @code{MADV_DONTNEED}. + +@end vtable + +@code{madvise} returns @math{0} for success and @math{-1} for +error. Errors include: +@table @code + +@item EINVAL +An invalid region was given, or the @var{advice} was invalid. + +@item EFAULT +There is no existing mapping in at least part of the given region. + +@end table +@end deftypefun + +@comment sys/mman.h +@comment POSIX +@deftypefn Function int shm_open (const char *@var{name}, int @var{oflag}, mode_t @var{mode}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asuinit{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c shm_open @mtslocale @asuinit @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_once(where_is_shmfs) @mtslocale @asuinit @ascuheap @asulock @aculock @acsmem @acsfd +@c where_is_shmfs @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c statfs dup ok +@c setmntent dup @ascuheap @asulock @acsmem @acsfd @aculock +@c getmntent_r dup @mtslocale @ascuheap @aculock @acsmem [no @asucorrupt @acucorrupt; exclusive stream] +@c strcmp dup ok +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c mempcpy dup ok +@c endmntent dup @ascuheap @asulock @aculock @acsmem @acsfd +@c strlen dup ok +@c strchr dup ok +@c mempcpy dup ok +@c open dup @acsfd +@c fcntl dup ok +@c close dup @acsfd + +This function returns a file descriptor that can be used to allocate shared +memory via mmap. Unrelated processes can use same @var{name} to create or +open existing shared memory objects. + +A @var{name} argument specifies the shared memory object to be opened. +In @theglibc{} it must be a string smaller than @code{NAME_MAX} bytes starting +with an optional slash but containing no other slashes. + +The semantics of @var{oflag} and @var{mode} arguments is same as in @code{open}. + +@code{shm_open} returns the file descriptor on success or @math{-1} on error. +On failure @code{errno} is set. +@end deftypefn + +@deftypefn Function int shm_unlink (const char *@var{name}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asuinit{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c shm_unlink @mtslocale @asuinit @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_once(where_is_shmfs) dup @mtslocale @asuinit @ascuheap @asulock @aculock @acsmem @acsfd +@c strlen dup ok +@c strchr dup ok +@c mempcpy dup ok +@c unlink dup ok + +This function is the inverse of @code{shm_open} and removes the object with +the given @var{name} previously created by @code{shm_open}. + +@code{shm_unlink} returns @math{0} on success or @math{-1} on error. +On failure @code{errno} is set. +@end deftypefn + +@node Waiting for I/O +@section Waiting for Input or Output +@cindex waiting for input or output +@cindex multiplexing input +@cindex input from multiple files + +Sometimes a program needs to accept input on multiple input channels +whenever input arrives. For example, some workstations may have devices +such as a digitizing tablet, function button box, or dial box that are +connected via normal asynchronous serial interfaces; good user interface +style requires responding immediately to input on any device. Another +example is a program that acts as a server to several other processes +via pipes or sockets. + +You cannot normally use @code{read} for this purpose, because this +blocks the program until input is available on one particular file +descriptor; input on other channels won't wake it up. You could set +nonblocking mode and poll each file descriptor in turn, but this is very +inefficient. + +A better solution is to use the @code{select} function. This blocks the +program until input or output is ready on a specified set of file +descriptors, or until a timer expires, whichever comes first. This +facility is declared in the header file @file{sys/types.h}. +@pindex sys/types.h + +In the case of a server socket (@pxref{Listening}), we say that +``input'' is available when there are pending connections that could be +accepted (@pxref{Accepting Connections}). @code{accept} for server +sockets blocks and interacts with @code{select} just as @code{read} does +for normal input. + +@cindex file descriptor sets, for @code{select} +The file descriptor sets for the @code{select} function are specified +as @code{fd_set} objects. Here is the description of the data type +and some macros for manipulating these objects. + +@comment sys/types.h +@comment BSD +@deftp {Data Type} fd_set +The @code{fd_set} data type represents file descriptor sets for the +@code{select} function. It is actually a bit array. +@end deftp + +@comment sys/types.h +@comment BSD +@deftypevr Macro int FD_SETSIZE +The value of this macro is the maximum number of file descriptors that a +@code{fd_set} object can hold information about. On systems with a +fixed maximum number, @code{FD_SETSIZE} is at least that number. On +some systems, including GNU, there is no absolute limit on the number of +descriptors open, but this macro still has a constant value which +controls the number of bits in an @code{fd_set}; if you get a file +descriptor with a value as high as @code{FD_SETSIZE}, you cannot put +that descriptor into an @code{fd_set}. +@end deftypevr + +@comment sys/types.h +@comment BSD +@deftypefn Macro void FD_ZERO (fd_set *@var{set}) +@safety{@prelim{}@mtsafe{@mtsrace{:set}}@assafe{}@acsafe{}} +This macro initializes the file descriptor set @var{set} to be the +empty set. +@end deftypefn + +@comment sys/types.h +@comment BSD +@deftypefn Macro void FD_SET (int @var{filedes}, fd_set *@var{set}) +@safety{@prelim{}@mtsafe{@mtsrace{:set}}@assafe{}@acsafe{}} +@c Setting a bit isn't necessarily atomic, so there's a potential race +@c here if set is not used exclusively. +This macro adds @var{filedes} to the file descriptor set @var{set}. + +The @var{filedes} parameter must not have side effects since it is +evaluated more than once. +@end deftypefn + +@comment sys/types.h +@comment BSD +@deftypefn Macro void FD_CLR (int @var{filedes}, fd_set *@var{set}) +@safety{@prelim{}@mtsafe{@mtsrace{:set}}@assafe{}@acsafe{}} +@c Setting a bit isn't necessarily atomic, so there's a potential race +@c here if set is not used exclusively. +This macro removes @var{filedes} from the file descriptor set @var{set}. + +The @var{filedes} parameter must not have side effects since it is +evaluated more than once. +@end deftypefn + +@comment sys/types.h +@comment BSD +@deftypefn Macro int FD_ISSET (int @var{filedes}, const fd_set *@var{set}) +@safety{@prelim{}@mtsafe{@mtsrace{:set}}@assafe{}@acsafe{}} +This macro returns a nonzero value (true) if @var{filedes} is a member +of the file descriptor set @var{set}, and zero (false) otherwise. + +The @var{filedes} parameter must not have side effects since it is +evaluated more than once. +@end deftypefn + +Next, here is the description of the @code{select} function itself. + +@comment sys/types.h +@comment BSD +@deftypefun int select (int @var{nfds}, fd_set *@var{read-fds}, fd_set *@var{write-fds}, fd_set *@var{except-fds}, struct timeval *@var{timeout}) +@safety{@prelim{}@mtsafe{@mtsrace{:read-fds} @mtsrace{:write-fds} @mtsrace{:except-fds}}@assafe{}@acsafe{}} +@c The select syscall is preferred, but pselect6 may be used instead, +@c which requires converting timeout to a timespec and back. The +@c conversions are not atomic. +The @code{select} function blocks the calling process until there is +activity on any of the specified sets of file descriptors, or until the +timeout period has expired. + +The file descriptors specified by the @var{read-fds} argument are +checked to see if they are ready for reading; the @var{write-fds} file +descriptors are checked to see if they are ready for writing; and the +@var{except-fds} file descriptors are checked for exceptional +conditions. You can pass a null pointer for any of these arguments if +you are not interested in checking for that kind of condition. + +A file descriptor is considered ready for reading if a @code{read} +call will not block. This usually includes the read offset being at +the end of the file or there is an error to report. A server socket +is considered ready for reading if there is a pending connection which +can be accepted with @code{accept}; @pxref{Accepting Connections}. A +client socket is ready for writing when its connection is fully +established; @pxref{Connecting}. + +``Exceptional conditions'' does not mean errors---errors are reported +immediately when an erroneous system call is executed, and do not +constitute a state of the descriptor. Rather, they include conditions +such as the presence of an urgent message on a socket. (@xref{Sockets}, +for information on urgent messages.) + +The @code{select} function checks only the first @var{nfds} file +descriptors. The usual thing is to pass @code{FD_SETSIZE} as the value +of this argument. + +The @var{timeout} specifies the maximum time to wait. If you pass a +null pointer for this argument, it means to block indefinitely until one +of the file descriptors is ready. Otherwise, you should provide the +time in @code{struct timeval} format; see @ref{High-Resolution +Calendar}. Specify zero as the time (a @code{struct timeval} containing +all zeros) if you want to find out which descriptors are ready without +waiting if none are ready. + +The normal return value from @code{select} is the total number of ready file +descriptors in all of the sets. Each of the argument sets is overwritten +with information about the descriptors that are ready for the corresponding +operation. Thus, to see if a particular descriptor @var{desc} has input, +use @code{FD_ISSET (@var{desc}, @var{read-fds})} after @code{select} returns. + +If @code{select} returns because the timeout period expires, it returns +a value of zero. + +Any signal will cause @code{select} to return immediately. So if your +program uses signals, you can't rely on @code{select} to keep waiting +for the full time specified. If you want to be sure of waiting for a +particular amount of time, you must check for @code{EINTR} and repeat +the @code{select} with a newly calculated timeout based on the current +time. See the example below. See also @ref{Interrupted Primitives}. + +If an error occurs, @code{select} returns @code{-1} and does not modify +the argument file descriptor sets. The following @code{errno} error +conditions are defined for this function: + +@table @code +@item EBADF +One of the file descriptor sets specified an invalid file descriptor. + +@item EINTR +The operation was interrupted by a signal. @xref{Interrupted Primitives}. + +@item EINVAL +The @var{timeout} argument is invalid; one of the components is negative +or too large. +@end table +@end deftypefun + +@strong{Portability Note:} The @code{select} function is a BSD Unix +feature. + +Here is an example showing how you can use @code{select} to establish a +timeout period for reading from a file descriptor. The @code{input_timeout} +function blocks the calling process until input is available on the +file descriptor, or until the timeout period expires. + +@smallexample +@include select.c.texi +@end smallexample + +There is another example showing the use of @code{select} to multiplex +input from multiple sockets in @ref{Server Example}. + + +@node Synchronizing I/O +@section Synchronizing I/O operations + +@cindex synchronizing +In most modern operating systems, the normal I/O operations are not +executed synchronously. I.e., even if a @code{write} system call +returns, this does not mean the data is actually written to the media, +e.g., the disk. + +In situations where synchronization points are necessary, you can use +special functions which ensure that all operations finish before +they return. + +@comment unistd.h +@comment X/Open +@deftypefun void sync (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +A call to this function will not return as long as there is data which +has not been written to the device. All dirty buffers in the kernel will +be written and so an overall consistent system can be achieved (if no +other process in parallel writes data). + +A prototype for @code{sync} can be found in @file{unistd.h}. +@end deftypefun + +Programs more often want to ensure that data written to a given file is +committed, rather than all data in the system. For this, @code{sync} is overkill. + + +@comment unistd.h +@comment POSIX +@deftypefun int fsync (int @var{fildes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fsync} function can be used to make sure all data associated with +the open file @var{fildes} is written to the device associated with the +descriptor. The function call does not return unless all actions have +finished. + +A prototype for @code{fsync} can be found in @file{unistd.h}. + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{fsync} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this, calls to @code{fsync} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The return value of the function is zero if no error occurred. Otherwise +it is @math{-1} and the global variable @var{errno} is set to the +following values: +@table @code +@item EBADF +The descriptor @var{fildes} is not valid. + +@item EINVAL +No synchronization is possible since the system does not implement this. +@end table +@end deftypefun + +Sometimes it is not even necessary to write all data associated with a +file descriptor. E.g., in database files which do not change in size it +is enough to write all the file content data to the device. +Meta-information, like the modification time etc., are not that important +and leaving such information uncommitted does not prevent a successful +recovery of the file in case of a problem. + +@comment unistd.h +@comment POSIX +@deftypefun int fdatasync (int @var{fildes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +When a call to the @code{fdatasync} function returns, it is ensured +that all of the file data is written to the device. For all pending I/O +operations, the parts guaranteeing data integrity finished. + +Not all systems implement the @code{fdatasync} operation. On systems +missing this functionality @code{fdatasync} is emulated by a call to +@code{fsync} since the performed actions are a superset of those +required by @code{fdatasync}. + +The prototype for @code{fdatasync} is in @file{unistd.h}. + +The return value of the function is zero if no error occurred. Otherwise +it is @math{-1} and the global variable @var{errno} is set to the +following values: +@table @code +@item EBADF +The descriptor @var{fildes} is not valid. + +@item EINVAL +No synchronization is possible since the system does not implement this. +@end table +@end deftypefun + + +@node Asynchronous I/O +@section Perform I/O Operations in Parallel + +The POSIX.1b standard defines a new set of I/O operations which can +significantly reduce the time an application spends waiting for I/O. The +new functions allow a program to initiate one or more I/O operations and +then immediately resume normal work while the I/O operations are +executed in parallel. This functionality is available if the +@file{unistd.h} file defines the symbol @code{_POSIX_ASYNCHRONOUS_IO}. + +These functions are part of the library with realtime functions named +@file{librt}. They are not actually part of the @file{libc} binary. +The implementation of these functions can be done using support in the +kernel (if available) or using an implementation based on threads at +userlevel. In the latter case it might be necessary to link applications +with the thread library @file{libpthread} in addition to @file{librt}. + +All AIO operations operate on files which were opened previously. There +might be arbitrarily many operations running for one file. The +asynchronous I/O operations are controlled using a data structure named +@code{struct aiocb} (@dfn{AIO control block}). It is defined in +@file{aio.h} as follows. + +@comment aio.h +@comment POSIX.1b +@deftp {Data Type} {struct aiocb} +The POSIX.1b standard mandates that the @code{struct aiocb} structure +contains at least the members described in the following table. There +might be more elements which are used by the implementation, but +depending upon these elements is not portable and is highly deprecated. + +@table @code +@item int aio_fildes +This element specifies the file descriptor to be used for the +operation. It must be a legal descriptor, otherwise the operation will +fail. + +The device on which the file is opened must allow the seek operation. +I.e., it is not possible to use any of the AIO operations on devices +like terminals where an @code{lseek} call would lead to an error. + +@item off_t aio_offset +This element specifies the offset in the file at which the operation (input +or output) is performed. Since the operations are carried out in arbitrary +order and more than one operation for one file descriptor can be +started, one cannot expect a current read/write position of the file +descriptor. + +@item volatile void *aio_buf +This is a pointer to the buffer with the data to be written or the place +where the read data is stored. + +@item size_t aio_nbytes +This element specifies the length of the buffer pointed to by @code{aio_buf}. + +@item int aio_reqprio +If the platform has defined @code{_POSIX_PRIORITIZED_IO} and +@code{_POSIX_PRIORITY_SCHEDULING}, the AIO requests are +processed based on the current scheduling priority. The +@code{aio_reqprio} element can then be used to lower the priority of the +AIO operation. + +@item struct sigevent aio_sigevent +This element specifies how the calling process is notified once the +operation terminates. If the @code{sigev_notify} element is +@code{SIGEV_NONE}, no notification is sent. If it is @code{SIGEV_SIGNAL}, +the signal determined by @code{sigev_signo} is sent. Otherwise, +@code{sigev_notify} must be @code{SIGEV_THREAD}. In this case, a thread +is created which starts executing the function pointed to by +@code{sigev_notify_function}. + +@item int aio_lio_opcode +This element is only used by the @code{lio_listio} and +@code{lio_listio64} functions. Since these functions allow an +arbitrary number of operations to start at once, and each operation can be +input or output (or nothing), the information must be stored in the +control block. The possible values are: + +@vtable @code +@item LIO_READ +Start a read operation. Read from the file at position +@code{aio_offset} and store the next @code{aio_nbytes} bytes in the +buffer pointed to by @code{aio_buf}. + +@item LIO_WRITE +Start a write operation. Write @code{aio_nbytes} bytes starting at +@code{aio_buf} into the file starting at position @code{aio_offset}. + +@item LIO_NOP +Do nothing for this control block. This value is useful sometimes when +an array of @code{struct aiocb} values contains holes, i.e., some of the +values must not be handled although the whole array is presented to the +@code{lio_listio} function. +@end vtable +@end table + +When the sources are compiled using @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine, this type is in fact @code{struct aiocb64}, since the LFS +interface transparently replaces the @code{struct aiocb} definition. +@end deftp + +For use with the AIO functions defined in the LFS, there is a similar type +defined which replaces the types of the appropriate members with larger +types but otherwise is equivalent to @code{struct aiocb}. Particularly, +all member names are the same. + +@comment aio.h +@comment POSIX.1b +@deftp {Data Type} {struct aiocb64} +@table @code +@item int aio_fildes +This element specifies the file descriptor which is used for the +operation. It must be a legal descriptor since otherwise the operation +fails for obvious reasons. + +The device on which the file is opened must allow the seek operation. +I.e., it is not possible to use any of the AIO operations on devices +like terminals where an @code{lseek} call would lead to an error. + +@item off64_t aio_offset +This element specifies at which offset in the file the operation (input +or output) is performed. Since the operation are carried in arbitrary +order and more than one operation for one file descriptor can be +started, one cannot expect a current read/write position of the file +descriptor. + +@item volatile void *aio_buf +This is a pointer to the buffer with the data to be written or the place +where the read data is stored. + +@item size_t aio_nbytes +This element specifies the length of the buffer pointed to by @code{aio_buf}. + +@item int aio_reqprio +If for the platform @code{_POSIX_PRIORITIZED_IO} and +@code{_POSIX_PRIORITY_SCHEDULING} are defined the AIO requests are +processed based on the current scheduling priority. The +@code{aio_reqprio} element can then be used to lower the priority of the +AIO operation. + +@item struct sigevent aio_sigevent +This element specifies how the calling process is notified once the +operation terminates. If the @code{sigev_notify} element is +@code{SIGEV_NONE} no notification is sent. If it is @code{SIGEV_SIGNAL}, +the signal determined by @code{sigev_signo} is sent. Otherwise, +@code{sigev_notify} must be @code{SIGEV_THREAD} in which case a thread +is created which starts executing the function pointed to by +@code{sigev_notify_function}. + +@item int aio_lio_opcode +This element is only used by the @code{lio_listio} and +@code{lio_listio64} functions. Since these functions allow an +arbitrary number of operations to start at once, and since each operation can be +input or output (or nothing), the information must be stored in the +control block. See the description of @code{struct aiocb} for a description +of the possible values. +@end table + +When the sources are compiled using @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine, this type is available under the name @code{struct +aiocb64}, since the LFS transparently replaces the old interface. +@end deftp + +@menu +* Asynchronous Reads/Writes:: Asynchronous Read and Write Operations. +* Status of AIO Operations:: Getting the Status of AIO Operations. +* Synchronizing AIO Operations:: Getting into a consistent state. +* Cancel AIO Operations:: Cancellation of AIO Operations. +* Configuration of AIO:: How to optimize the AIO implementation. +@end menu + +@node Asynchronous Reads/Writes +@subsection Asynchronous Read and Write Operations + +@comment aio.h +@comment POSIX.1b +@deftypefun int aio_read (struct aiocb *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +@c Calls aio_enqueue_request. +@c aio_enqueue_request @asulock @ascuheap @aculock @acsmem +@c pthread_self ok +@c pthread_getschedparam @asulock @aculock +@c lll_lock (pthread descriptor's lock) @asulock @aculock +@c sched_getparam ok +@c sched_getscheduler ok +@c lll_unlock @aculock +@c pthread_mutex_lock (aio_requests_mutex) @asulock @aculock +@c get_elem @ascuheap @acsmem [@asucorrupt @acucorrupt] +@c realloc @ascuheap @acsmem +@c calloc @ascuheap @acsmem +@c aio_create_helper_thread @asulock @ascuheap @aculock @acsmem +@c pthread_attr_init ok +@c pthread_attr_setdetachstate ok +@c pthread_get_minstack ok +@c pthread_attr_setstacksize ok +@c sigfillset ok +@c memset ok +@c sigdelset ok +@c SYSCALL rt_sigprocmask ok +@c pthread_create @asulock @ascuheap @aculock @acsmem +@c lll_lock (default_pthread_attr_lock) @asulock @aculock +@c alloca/malloc @ascuheap @acsmem +@c lll_unlock @aculock +@c allocate_stack @asulock @ascuheap @aculock @acsmem +@c getpagesize dup +@c lll_lock (default_pthread_attr_lock) @asulock @aculock +@c lll_unlock @aculock +@c _dl_allocate_tls @ascuheap @acsmem +@c _dl_allocate_tls_storage @ascuheap @acsmem +@c memalign @ascuheap @acsmem +@c memset ok +@c allocate_dtv dup +@c free @ascuheap @acsmem +@c allocate_dtv @ascuheap @acsmem +@c calloc @ascuheap @acsmem +@c INSTALL_DTV ok +@c list_add dup +@c get_cached_stack +@c lll_lock (stack_cache_lock) @asulock @aculock +@c list_for_each ok +@c list_entry dup +@c FREE_P dup +@c stack_list_del dup +@c stack_list_add dup +@c lll_unlock @aculock +@c _dl_allocate_tls_init ok +@c GET_DTV ok +@c mmap ok +@c atomic_increment_val ok +@c munmap ok +@c change_stack_perm ok +@c mprotect ok +@c mprotect ok +@c stack_list_del dup +@c _dl_deallocate_tls dup +@c munmap ok +@c THREAD_COPY_STACK_GUARD ok +@c THREAD_COPY_POINTER_GUARD ok +@c atomic_exchange_acq ok +@c lll_futex_wake ok +@c deallocate_stack @asulock @ascuheap @aculock @acsmem +@c lll_lock (state_cache_lock) @asulock @aculock +@c stack_list_del ok +@c atomic_write_barrier ok +@c list_del ok +@c atomic_write_barrier ok +@c queue_stack @ascuheap @acsmem +@c stack_list_add ok +@c atomic_write_barrier ok +@c list_add ok +@c atomic_write_barrier ok +@c free_stacks @ascuheap @acsmem +@c list_for_each_prev_safe ok +@c list_entry ok +@c FREE_P ok +@c stack_list_del dup +@c _dl_deallocate_tls dup +@c munmap ok +@c _dl_deallocate_tls @ascuheap @acsmem +@c free @ascuheap @acsmem +@c lll_unlock @aculock +@c create_thread @asulock @ascuheap @aculock @acsmem +@c td_eventword +@c td_eventmask +@c do_clone @asulock @ascuheap @aculock @acsmem +@c PREPARE_CREATE ok +@c lll_lock (pd->lock) @asulock @aculock +@c atomic_increment ok +@c clone ok +@c atomic_decrement ok +@c atomic_exchange_acq ok +@c lll_futex_wake ok +@c deallocate_stack dup +@c sched_setaffinity ok +@c tgkill ok +@c sched_setscheduler ok +@c atomic_compare_and_exchange_bool_acq ok +@c nptl_create_event ok +@c lll_unlock (pd->lock) @aculock +@c free @ascuheap @acsmem +@c pthread_attr_destroy ok (cpuset won't be set, so free isn't called) +@c add_request_to_runlist ok +@c pthread_cond_signal ok +@c aio_free_request ok +@c pthread_mutex_unlock @aculock + +@c (in the new thread, initiated with clone) +@c start_thread ok +@c HP_TIMING_NOW ok +@c ctype_init @mtslocale +@c atomic_exchange_acq ok +@c lll_futex_wake ok +@c sigemptyset ok +@c sigaddset ok +@c setjmp ok +@c CANCEL_ASYNC -> pthread_enable_asynccancel ok +@c do_cancel ok +@c pthread_unwind ok +@c Unwind_ForcedUnwind or longjmp ok [@ascuheap @acsmem?] +@c lll_lock @asulock @aculock +@c lll_unlock @asulock @aculock +@c CANCEL_RESET -> pthread_disable_asynccancel ok +@c lll_futex_wait ok +@c ->start_routine ok ----- +@c call_tls_dtors @asulock @ascuheap @aculock @acsmem +@c user-supplied dtor +@c rtld_lock_lock_recursive (dl_load_lock) @asulock @aculock +@c rtld_lock_unlock_recursive @aculock +@c free @ascuheap @acsmem +@c nptl_deallocate_tsd @ascuheap @acsmem +@c tsd user-supplied dtors ok +@c free @ascuheap @acsmem +@c libc_thread_freeres +@c libc_thread_subfreeres ok +@c atomic_decrement_and_test ok +@c td_eventword ok +@c td_eventmask ok +@c atomic_compare_exchange_bool_acq ok +@c nptl_death_event ok +@c lll_robust_dead ok +@c getpagesize ok +@c madvise ok +@c free_tcb @asulock @ascuheap @aculock @acsmem +@c free @ascuheap @acsmem +@c deallocate_stack @asulock @ascuheap @aculock @acsmem +@c lll_futex_wait ok +@c exit_thread_inline ok +@c syscall(exit) ok + +This function initiates an asynchronous read operation. It +immediately returns after the operation was enqueued or when an +error was encountered. + +The first @code{aiocbp->aio_nbytes} bytes of the file for which +@code{aiocbp->aio_fildes} is a descriptor are written to the buffer +starting at @code{aiocbp->aio_buf}. Reading starts at the absolute +position @code{aiocbp->aio_offset} in the file. + +If prioritized I/O is supported by the platform the +@code{aiocbp->aio_reqprio} value is used to adjust the priority before +the request is actually enqueued. + +The calling process is notified about the termination of the read +request according to the @code{aiocbp->aio_sigevent} value. + +When @code{aio_read} returns, the return value is zero if no error +occurred that can be found before the process is enqueued. If such an +early error is found, the function returns @math{-1} and sets +@code{errno} to one of the following values: + +@table @code +@item EAGAIN +The request was not enqueued due to (temporarily) exceeded resource +limitations. +@item ENOSYS +The @code{aio_read} function is not implemented. +@item EBADF +The @code{aiocbp->aio_fildes} descriptor is not valid. This condition +need not be recognized before enqueueing the request and so this error +might also be signaled asynchronously. +@item EINVAL +The @code{aiocbp->aio_offset} or @code{aiocbp->aio_reqpiro} value is +invalid. This condition need not be recognized before enqueueing the +request and so this error might also be signaled asynchronously. +@end table + +If @code{aio_read} returns zero, the current status of the request +can be queried using @code{aio_error} and @code{aio_return} functions. +As long as the value returned by @code{aio_error} is @code{EINPROGRESS} +the operation has not yet completed. If @code{aio_error} returns zero, +the operation successfully terminated, otherwise the value is to be +interpreted as an error code. If the function terminated, the result of +the operation can be obtained using a call to @code{aio_return}. The +returned value is the same as an equivalent call to @code{read} would +have returned. Possible error codes returned by @code{aio_error} are: + +@table @code +@item EBADF +The @code{aiocbp->aio_fildes} descriptor is not valid. +@item ECANCELED +The operation was canceled before the operation was finished +(@pxref{Cancel AIO Operations}) +@item EINVAL +The @code{aiocbp->aio_offset} value is invalid. +@end table + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is in fact @code{aio_read64} since the LFS interface transparently +replaces the normal implementation. +@end deftypefun + +@comment aio.h +@comment Unix98 +@deftypefun int aio_read64 (struct aiocb64 *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +This function is similar to the @code{aio_read} function. The only +difference is that on @w{32 bit} machines, the file descriptor should +be opened in the large file mode. Internally, @code{aio_read64} uses +functionality equivalent to @code{lseek64} (@pxref{File Position +Primitive}) to position the file descriptor correctly for the reading, +as opposed to the @code{lseek} functionality used in @code{aio_read}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this +function is available under the name @code{aio_read} and so transparently +replaces the interface for small files on 32 bit machines. +@end deftypefun + +To write data asynchronously to a file, there exists an equivalent pair +of functions with a very similar interface. + +@comment aio.h +@comment POSIX.1b +@deftypefun int aio_write (struct aiocb *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +This function initiates an asynchronous write operation. The function +call immediately returns after the operation was enqueued or if before +this happens an error was encountered. + +The first @code{aiocbp->aio_nbytes} bytes from the buffer starting at +@code{aiocbp->aio_buf} are written to the file for which +@code{aiocbp->aio_fildes} is a descriptor, starting at the absolute +position @code{aiocbp->aio_offset} in the file. + +If prioritized I/O is supported by the platform, the +@code{aiocbp->aio_reqprio} value is used to adjust the priority before +the request is actually enqueued. + +The calling process is notified about the termination of the read +request according to the @code{aiocbp->aio_sigevent} value. + +When @code{aio_write} returns, the return value is zero if no error +occurred that can be found before the process is enqueued. If such an +early error is found the function returns @math{-1} and sets +@code{errno} to one of the following values. + +@table @code +@item EAGAIN +The request was not enqueued due to (temporarily) exceeded resource +limitations. +@item ENOSYS +The @code{aio_write} function is not implemented. +@item EBADF +The @code{aiocbp->aio_fildes} descriptor is not valid. This condition +may not be recognized before enqueueing the request, and so this error +might also be signaled asynchronously. +@item EINVAL +The @code{aiocbp->aio_offset} or @code{aiocbp->aio_reqprio} value is +invalid. This condition may not be recognized before enqueueing the +request and so this error might also be signaled asynchronously. +@end table + +In the case @code{aio_write} returns zero, the current status of the +request can be queried using the @code{aio_error} and @code{aio_return} +functions. As long as the value returned by @code{aio_error} is +@code{EINPROGRESS} the operation has not yet completed. If +@code{aio_error} returns zero, the operation successfully terminated, +otherwise the value is to be interpreted as an error code. If the +function terminated, the result of the operation can be obtained using a call +to @code{aio_return}. The returned value is the same as an equivalent +call to @code{read} would have returned. Possible error codes returned +by @code{aio_error} are: + +@table @code +@item EBADF +The @code{aiocbp->aio_fildes} descriptor is not valid. +@item ECANCELED +The operation was canceled before the operation was finished. +(@pxref{Cancel AIO Operations}) +@item EINVAL +The @code{aiocbp->aio_offset} value is invalid. +@end table + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this +function is in fact @code{aio_write64} since the LFS interface transparently +replaces the normal implementation. +@end deftypefun + +@comment aio.h +@comment Unix98 +@deftypefun int aio_write64 (struct aiocb64 *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +This function is similar to the @code{aio_write} function. The only +difference is that on @w{32 bit} machines the file descriptor should +be opened in the large file mode. Internally @code{aio_write64} uses +functionality equivalent to @code{lseek64} (@pxref{File Position +Primitive}) to position the file descriptor correctly for the writing, +as opposed to the @code{lseek} functionality used in @code{aio_write}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this +function is available under the name @code{aio_write} and so transparently +replaces the interface for small files on 32 bit machines. +@end deftypefun + +Besides these functions with the more or less traditional interface, +POSIX.1b also defines a function which can initiate more than one +operation at a time, and which can handle freely mixed read and write +operations. It is therefore similar to a combination of @code{readv} and +@code{writev}. + +@comment aio.h +@comment POSIX.1b +@deftypefun int lio_listio (int @var{mode}, struct aiocb *const @var{list}[], int @var{nent}, struct sigevent *@var{sig}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +@c Call lio_listio_internal, that takes the aio_requests_mutex lock and +@c enqueues each request. Then, it waits for notification or prepares +@c for it before releasing the lock. Even though it performs memory +@c allocation and locking of its own, it doesn't add any classes of +@c safety issues that aren't already covered by aio_enqueue_request. +The @code{lio_listio} function can be used to enqueue an arbitrary +number of read and write requests at one time. The requests can all be +meant for the same file, all for different files or every solution in +between. + +@code{lio_listio} gets the @var{nent} requests from the array pointed to +by @var{list}. The operation to be performed is determined by the +@code{aio_lio_opcode} member in each element of @var{list}. If this +field is @code{LIO_READ} a read operation is enqueued, similar to a call +of @code{aio_read} for this element of the array (except that the way +the termination is signalled is different, as we will see below). If +the @code{aio_lio_opcode} member is @code{LIO_WRITE} a write operation +is enqueued. Otherwise the @code{aio_lio_opcode} must be @code{LIO_NOP} +in which case this element of @var{list} is simply ignored. This +``operation'' is useful in situations where one has a fixed array of +@code{struct aiocb} elements from which only a few need to be handled at +a time. Another situation is where the @code{lio_listio} call was +canceled before all requests are processed (@pxref{Cancel AIO +Operations}) and the remaining requests have to be reissued. + +The other members of each element of the array pointed to by +@code{list} must have values suitable for the operation as described in +the documentation for @code{aio_read} and @code{aio_write} above. + +The @var{mode} argument determines how @code{lio_listio} behaves after +having enqueued all the requests. If @var{mode} is @code{LIO_WAIT} it +waits until all requests terminated. Otherwise @var{mode} must be +@code{LIO_NOWAIT} and in this case the function returns immediately after +having enqueued all the requests. In this case the caller gets a +notification of the termination of all requests according to the +@var{sig} parameter. If @var{sig} is @code{NULL} no notification is +sent. Otherwise a signal is sent or a thread is started, just as +described in the description for @code{aio_read} or @code{aio_write}. + +If @var{mode} is @code{LIO_WAIT}, the return value of @code{lio_listio} +is @math{0} when all requests completed successfully. Otherwise the +function returns @math{-1} and @code{errno} is set accordingly. To find +out which request or requests failed one has to use the @code{aio_error} +function on all the elements of the array @var{list}. + +In case @var{mode} is @code{LIO_NOWAIT}, the function returns @math{0} if +all requests were enqueued correctly. The current state of the requests +can be found using @code{aio_error} and @code{aio_return} as described +above. If @code{lio_listio} returns @math{-1} in this mode, the +global variable @code{errno} is set accordingly. If a request did not +yet terminate, a call to @code{aio_error} returns @code{EINPROGRESS}. If +the value is different, the request is finished and the error value (or +@math{0}) is returned and the result of the operation can be retrieved +using @code{aio_return}. + +Possible values for @code{errno} are: + +@table @code +@item EAGAIN +The resources necessary to queue all the requests are not available at +the moment. The error status for each element of @var{list} must be +checked to determine which request failed. + +Another reason could be that the system wide limit of AIO requests is +exceeded. This cannot be the case for the implementation on @gnusystems{} +since no arbitrary limits exist. +@item EINVAL +The @var{mode} parameter is invalid or @var{nent} is larger than +@code{AIO_LISTIO_MAX}. +@item EIO +One or more of the request's I/O operations failed. The error status of +each request should be checked to determine which one failed. +@item ENOSYS +The @code{lio_listio} function is not supported. +@end table + +If the @var{mode} parameter is @code{LIO_NOWAIT} and the caller cancels +a request, the error status for this request returned by +@code{aio_error} is @code{ECANCELED}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this +function is in fact @code{lio_listio64} since the LFS interface +transparently replaces the normal implementation. +@end deftypefun + +@comment aio.h +@comment Unix98 +@deftypefun int lio_listio64 (int @var{mode}, struct aiocb64 *const @var{list}[], int @var{nent}, struct sigevent *@var{sig}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +This function is similar to the @code{lio_listio} function. The only +difference is that on @w{32 bit} machines, the file descriptor should +be opened in the large file mode. Internally, @code{lio_listio64} uses +functionality equivalent to @code{lseek64} (@pxref{File Position +Primitive}) to position the file descriptor correctly for the reading or +writing, as opposed to the @code{lseek} functionality used in +@code{lio_listio}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this +function is available under the name @code{lio_listio} and so +transparently replaces the interface for small files on 32 bit +machines. +@end deftypefun + +@node Status of AIO Operations +@subsection Getting the Status of AIO Operations + +As already described in the documentation of the functions in the last +section, it must be possible to get information about the status of an I/O +request. When the operation is performed truly asynchronously (as with +@code{aio_read} and @code{aio_write} and with @code{lio_listio} when the +mode is @code{LIO_NOWAIT}), one sometimes needs to know whether a +specific request already terminated and if so, what the result was. +The following two functions allow you to get this kind of information. + +@comment aio.h +@comment POSIX.1b +@deftypefun int aio_error (const struct aiocb *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function determines the error state of the request described by the +@code{struct aiocb} variable pointed to by @var{aiocbp}. If the +request has not yet terminated the value returned is always +@code{EINPROGRESS}. Once the request has terminated the value +@code{aio_error} returns is either @math{0} if the request completed +successfully or it returns the value which would be stored in the +@code{errno} variable if the request would have been done using +@code{read}, @code{write}, or @code{fsync}. + +The function can return @code{ENOSYS} if it is not implemented. It +could also return @code{EINVAL} if the @var{aiocbp} parameter does not +refer to an asynchronous operation whose return status is not yet known. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is in fact @code{aio_error64} since the LFS interface +transparently replaces the normal implementation. +@end deftypefun + +@comment aio.h +@comment Unix98 +@deftypefun int aio_error64 (const struct aiocb64 *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{aio_error} with the only difference +that the argument is a reference to a variable of type @code{struct +aiocb64}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is available under the name @code{aio_error} and so +transparently replaces the interface for small files on 32 bit +machines. +@end deftypefun + +@comment aio.h +@comment POSIX.1b +@deftypefun ssize_t aio_return (struct aiocb *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function can be used to retrieve the return status of the operation +carried out by the request described in the variable pointed to by +@var{aiocbp}. As long as the error status of this request as returned +by @code{aio_error} is @code{EINPROGRESS} the return value of this function is +undefined. + +Once the request is finished this function can be used exactly once to +retrieve the return value. Following calls might lead to undefined +behavior. The return value itself is the value which would have been +returned by the @code{read}, @code{write}, or @code{fsync} call. + +The function can return @code{ENOSYS} if it is not implemented. It +could also return @code{EINVAL} if the @var{aiocbp} parameter does not +refer to an asynchronous operation whose return status is not yet known. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is in fact @code{aio_return64} since the LFS interface +transparently replaces the normal implementation. +@end deftypefun + +@comment aio.h +@comment Unix98 +@deftypefun ssize_t aio_return64 (struct aiocb64 *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{aio_return} with the only difference +that the argument is a reference to a variable of type @code{struct +aiocb64}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is available under the name @code{aio_return} and so +transparently replaces the interface for small files on 32 bit +machines. +@end deftypefun + +@node Synchronizing AIO Operations +@subsection Getting into a Consistent State + +When dealing with asynchronous operations it is sometimes necessary to +get into a consistent state. This would mean for AIO that one wants to +know whether a certain request or a group of requests were processed. +This could be done by waiting for the notification sent by the system +after the operation terminated, but this sometimes would mean wasting +resources (mainly computation time). Instead POSIX.1b defines two +functions which will help with most kinds of consistency. + +The @code{aio_fsync} and @code{aio_fsync64} functions are only available +if the symbol @code{_POSIX_SYNCHRONIZED_IO} is defined in @file{unistd.h}. + +@cindex synchronizing +@comment aio.h +@comment POSIX.1b +@deftypefun int aio_fsync (int @var{op}, struct aiocb *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +@c After fcntl to check that the FD is open, it calls +@c aio_enqueue_request. +Calling this function forces all I/O operations queued at the +time of the function call operating on the file descriptor +@code{aiocbp->aio_fildes} into the synchronized I/O completion state +(@pxref{Synchronizing I/O}). The @code{aio_fsync} function returns +immediately but the notification through the method described in +@code{aiocbp->aio_sigevent} will happen only after all requests for this +file descriptor have terminated and the file is synchronized. This also +means that requests for this very same file descriptor which are queued +after the synchronization request are not affected. + +If @var{op} is @code{O_DSYNC} the synchronization happens as with a call +to @code{fdatasync}. Otherwise @var{op} should be @code{O_SYNC} and +the synchronization happens as with @code{fsync}. + +As long as the synchronization has not happened, a call to +@code{aio_error} with the reference to the object pointed to by +@var{aiocbp} returns @code{EINPROGRESS}. Once the synchronization is +done @code{aio_error} return @math{0} if the synchronization was not +successful. Otherwise the value returned is the value to which the +@code{fsync} or @code{fdatasync} function would have set the +@code{errno} variable. In this case nothing can be assumed about the +consistency of the data written to this file descriptor. + +The return value of this function is @math{0} if the request was +successfully enqueued. Otherwise the return value is @math{-1} and +@code{errno} is set to one of the following values: + +@table @code +@item EAGAIN +The request could not be enqueued due to temporary lack of resources. +@item EBADF +The file descriptor @code{@var{aiocbp}->aio_fildes} is not valid. +@item EINVAL +The implementation does not support I/O synchronization or the @var{op} +parameter is other than @code{O_DSYNC} and @code{O_SYNC}. +@item ENOSYS +This function is not implemented. +@end table + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is in fact @code{aio_fsync64} since the LFS interface +transparently replaces the normal implementation. +@end deftypefun + +@comment aio.h +@comment Unix98 +@deftypefun int aio_fsync64 (int @var{op}, struct aiocb64 *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +This function is similar to @code{aio_fsync} with the only difference +that the argument is a reference to a variable of type @code{struct +aiocb64}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is available under the name @code{aio_fsync} and so +transparently replaces the interface for small files on 32 bit +machines. +@end deftypefun + +Another method of synchronization is to wait until one or more requests of a +specific set terminated. This could be achieved by the @code{aio_*} +functions to notify the initiating process about the termination but in +some situations this is not the ideal solution. In a program which +constantly updates clients somehow connected to the server it is not +always the best solution to go round robin since some connections might +be slow. On the other hand letting the @code{aio_*} functions notify the +caller might also be not the best solution since whenever the process +works on preparing data for a client it makes no sense to be +interrupted by a notification since the new client will not be handled +before the current client is served. For situations like this +@code{aio_suspend} should be used. + +@comment aio.h +@comment POSIX.1b +@deftypefun int aio_suspend (const struct aiocb *const @var{list}[], int @var{nent}, const struct timespec *@var{timeout}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c Take aio_requests_mutex, set up waitlist and requestlist, wait +@c for completion or timeout, and release the mutex. +When calling this function, the calling thread is suspended until at +least one of the requests pointed to by the @var{nent} elements of the +array @var{list} has completed. If any of the requests has already +completed at the time @code{aio_suspend} is called, the function returns +immediately. Whether a request has terminated or not is determined by +comparing the error status of the request with @code{EINPROGRESS}. If +an element of @var{list} is @code{NULL}, the entry is simply ignored. + +If no request has finished, the calling process is suspended. If +@var{timeout} is @code{NULL}, the process is not woken until a request +has finished. If @var{timeout} is not @code{NULL}, the process remains +suspended at least as long as specified in @var{timeout}. In this case, +@code{aio_suspend} returns with an error. + +The return value of the function is @math{0} if one or more requests +from the @var{list} have terminated. Otherwise the function returns +@math{-1} and @code{errno} is set to one of the following values: + +@table @code +@item EAGAIN +None of the requests from the @var{list} completed in the time specified +by @var{timeout}. +@item EINTR +A signal interrupted the @code{aio_suspend} function. This signal might +also be sent by the AIO implementation while signalling the termination +of one of the requests. +@item ENOSYS +The @code{aio_suspend} function is not implemented. +@end table + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is in fact @code{aio_suspend64} since the LFS interface +transparently replaces the normal implementation. +@end deftypefun + +@comment aio.h +@comment Unix98 +@deftypefun int aio_suspend64 (const struct aiocb64 *const @var{list}[], int @var{nent}, const struct timespec *@var{timeout}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +This function is similar to @code{aio_suspend} with the only difference +that the argument is a reference to a variable of type @code{struct +aiocb64}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} this +function is available under the name @code{aio_suspend} and so +transparently replaces the interface for small files on 32 bit +machines. +@end deftypefun + +@node Cancel AIO Operations +@subsection Cancellation of AIO Operations + +When one or more requests are asynchronously processed, it might be +useful in some situations to cancel a selected operation, e.g., if it +becomes obvious that the written data is no longer accurate and would +have to be overwritten soon. As an example, assume an application, which +writes data in files in a situation where new incoming data would have +to be written in a file which will be updated by an enqueued request. +The POSIX AIO implementation provides such a function, but this function +is not capable of forcing the cancellation of the request. It is up to the +implementation to decide whether it is possible to cancel the operation +or not. Therefore using this function is merely a hint. + +@comment aio.h +@comment POSIX.1b +@deftypefun int aio_cancel (int @var{fildes}, struct aiocb *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +@c After fcntl to check the fd is open, hold aio_requests_mutex, call +@c aio_find_req_fd, aio_remove_request, then aio_notify and +@c aio_free_request each request before releasing the lock. +@c aio_notify calls aio_notify_only and free, besides cond signal or +@c similar. aio_notify_only calls pthread_attr_init, +@c pthread_attr_setdetachstate, malloc, pthread_create, +@c notify_func_wrapper, aio_sigqueue, getpid, raise. +@c notify_func_wraper calls aio_start_notify_thread, free and then the +@c notifier function. +The @code{aio_cancel} function can be used to cancel one or more +outstanding requests. If the @var{aiocbp} parameter is @code{NULL}, the +function tries to cancel all of the outstanding requests which would process +the file descriptor @var{fildes} (i.e., whose @code{aio_fildes} member +is @var{fildes}). If @var{aiocbp} is not @code{NULL}, @code{aio_cancel} +attempts to cancel the specific request pointed to by @var{aiocbp}. + +For requests which were successfully canceled, the normal notification +about the termination of the request should take place. I.e., depending +on the @code{struct sigevent} object which controls this, nothing +happens, a signal is sent or a thread is started. If the request cannot +be canceled, it terminates the usual way after performing the operation. + +After a request is successfully canceled, a call to @code{aio_error} with +a reference to this request as the parameter will return +@code{ECANCELED} and a call to @code{aio_return} will return @math{-1}. +If the request wasn't canceled and is still running the error status is +still @code{EINPROGRESS}. + +The return value of the function is @code{AIO_CANCELED} if there were +requests which haven't terminated and which were successfully canceled. +If there is one or more requests left which couldn't be canceled, the +return value is @code{AIO_NOTCANCELED}. In this case @code{aio_error} +must be used to find out which of the, perhaps multiple, requests (if +@var{aiocbp} is @code{NULL}) weren't successfully canceled. If all +requests already terminated at the time @code{aio_cancel} is called the +return value is @code{AIO_ALLDONE}. + +If an error occurred during the execution of @code{aio_cancel} the +function returns @math{-1} and sets @code{errno} to one of the following +values. + +@table @code +@item EBADF +The file descriptor @var{fildes} is not valid. +@item ENOSYS +@code{aio_cancel} is not implemented. +@end table + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this +function is in fact @code{aio_cancel64} since the LFS interface +transparently replaces the normal implementation. +@end deftypefun + +@comment aio.h +@comment Unix98 +@deftypefun int aio_cancel64 (int @var{fildes}, struct aiocb64 *@var{aiocbp}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +This function is similar to @code{aio_cancel} with the only difference +that the argument is a reference to a variable of type @code{struct +aiocb64}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64}, this +function is available under the name @code{aio_cancel} and so +transparently replaces the interface for small files on 32 bit +machines. +@end deftypefun + +@node Configuration of AIO +@subsection How to optimize the AIO implementation + +The POSIX standard does not specify how the AIO functions are +implemented. They could be system calls, but it is also possible to +emulate them at userlevel. + +At the time of writing, the available implementation is a user-level +implementation which uses threads for handling the enqueued requests. +While this implementation requires making some decisions about +limitations, hard limitations are something best avoided +in @theglibc{}. Therefore, @theglibc{} provides a means +for tuning the AIO implementation according to the individual use. + +@comment aio.h +@comment GNU +@deftp {Data Type} {struct aioinit} +This data type is used to pass the configuration or tunable parameters +to the implementation. The program has to initialize the members of +this struct and pass it to the implementation using the @code{aio_init} +function. + +@table @code +@item int aio_threads +This member specifies the maximal number of threads which may be used +at any one time. +@item int aio_num +This number provides an estimate on the maximal number of simultaneously +enqueued requests. +@item int aio_locks +Unused. +@item int aio_usedba +Unused. +@item int aio_debug +Unused. +@item int aio_numusers +Unused. +@item int aio_reserved[2] +Unused. +@end table +@end deftp + +@comment aio.h +@comment GNU +@deftypefun void aio_init (const struct aioinit *@var{init}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c All changes to global objects are guarded by aio_requests_mutex. +This function must be called before any other AIO function. Calling it +is completely voluntary, as it is only meant to help the AIO +implementation perform better. + +Before calling @code{aio_init}, the members of a variable of +type @code{struct aioinit} must be initialized. Then a reference to +this variable is passed as the parameter to @code{aio_init} which itself +may or may not pay attention to the hints. + +The function has no return value and no error cases are defined. It is +an extension which follows a proposal from the SGI implementation in +@w{Irix 6}. It is not covered by POSIX.1b or Unix98. +@end deftypefun + +@node Control Operations +@section Control Operations on Files + +@cindex control operations on files +@cindex @code{fcntl} function +This section describes how you can perform various other operations on +file descriptors, such as inquiring about or setting flags describing +the status of the file descriptor, manipulating record locks, and the +like. All of these operations are performed by the function @code{fcntl}. + +The second argument to the @code{fcntl} function is a command that +specifies which operation to perform. The function and macros that name +various flags that are used with it are declared in the header file +@file{fcntl.h}. Many of these flags are also used by the @code{open} +function; see @ref{Opening and Closing Files}. +@pindex fcntl.h + +@comment fcntl.h +@comment POSIX.1 +@deftypefun int fcntl (int @var{filedes}, int @var{command}, @dots{}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{fcntl} function performs the operation specified by +@var{command} on the file descriptor @var{filedes}. Some commands +require additional arguments to be supplied. These additional arguments +and the return value and error conditions are given in the detailed +descriptions of the individual commands. + +Briefly, here is a list of what the various commands are. + +@vtable @code +@item F_DUPFD +Duplicate the file descriptor (return another file descriptor pointing +to the same open file). @xref{Duplicating Descriptors}. + +@item F_GETFD +Get flags associated with the file descriptor. @xref{Descriptor Flags}. + +@item F_SETFD +Set flags associated with the file descriptor. @xref{Descriptor Flags}. + +@item F_GETFL +Get flags associated with the open file. @xref{File Status Flags}. + +@item F_SETFL +Set flags associated with the open file. @xref{File Status Flags}. + +@item F_GETLK +Test a file lock. @xref{File Locks}. + +@item F_SETLK +Set or clear a file lock. @xref{File Locks}. + +@item F_SETLKW +Like @code{F_SETLK}, but wait for completion. @xref{File Locks}. + +@item F_OFD_GETLK +Test an open file description lock. @xref{Open File Description Locks}. +Specific to Linux. + +@item F_OFD_SETLK +Set or clear an open file description lock. @xref{Open File Description Locks}. +Specific to Linux. + +@item F_OFD_SETLKW +Like @code{F_OFD_SETLK}, but block until lock is acquired. +@xref{Open File Description Locks}. Specific to Linux. + +@item F_GETOWN +Get process or process group ID to receive @code{SIGIO} signals. +@xref{Interrupt Input}. + +@item F_SETOWN +Set process or process group ID to receive @code{SIGIO} signals. +@xref{Interrupt Input}. +@end vtable + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{fcntl} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this calls to @code{fcntl} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop +@end deftypefun + + +@node Duplicating Descriptors +@section Duplicating Descriptors + +@cindex duplicating file descriptors +@cindex redirecting input and output + +You can @dfn{duplicate} a file descriptor, or allocate another file +descriptor that refers to the same open file as the original. Duplicate +descriptors share one file position and one set of file status flags +(@pxref{File Status Flags}), but each has its own set of file descriptor +flags (@pxref{Descriptor Flags}). + +The major use of duplicating a file descriptor is to implement +@dfn{redirection} of input or output: that is, to change the +file or pipe that a particular file descriptor corresponds to. + +You can perform this operation using the @code{fcntl} function with the +@code{F_DUPFD} command, but there are also convenient functions +@code{dup} and @code{dup2} for duplicating descriptors. + +@pindex unistd.h +@pindex fcntl.h +The @code{fcntl} function and flags are declared in @file{fcntl.h}, +while prototypes for @code{dup} and @code{dup2} are in the header file +@file{unistd.h}. + +@comment unistd.h +@comment POSIX.1 +@deftypefun int dup (int @var{old}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function copies descriptor @var{old} to the first available +descriptor number (the first number not currently open). It is +equivalent to @code{fcntl (@var{old}, F_DUPFD, 0)}. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int dup2 (int @var{old}, int @var{new}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function copies the descriptor @var{old} to descriptor number +@var{new}. + +If @var{old} is an invalid descriptor, then @code{dup2} does nothing; it +does not close @var{new}. Otherwise, the new duplicate of @var{old} +replaces any previous meaning of descriptor @var{new}, as if @var{new} +were closed first. + +If @var{old} and @var{new} are different numbers, and @var{old} is a +valid descriptor number, then @code{dup2} is equivalent to: + +@smallexample +close (@var{new}); +fcntl (@var{old}, F_DUPFD, @var{new}) +@end smallexample + +However, @code{dup2} does this atomically; there is no instant in the +middle of calling @code{dup2} at which @var{new} is closed and not yet a +duplicate of @var{old}. +@end deftypefun + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_DUPFD +This macro is used as the @var{command} argument to @code{fcntl}, to +copy the file descriptor given as the first argument. + +The form of the call in this case is: + +@smallexample +fcntl (@var{old}, F_DUPFD, @var{next-filedes}) +@end smallexample + +The @var{next-filedes} argument is of type @code{int} and specifies that +the file descriptor returned should be the next available one greater +than or equal to this value. + +The return value from @code{fcntl} with this command is normally the value +of the new file descriptor. A return value of @math{-1} indicates an +error. The following @code{errno} error conditions are defined for +this command: + +@table @code +@item EBADF +The @var{old} argument is invalid. + +@item EINVAL +The @var{next-filedes} argument is invalid. + +@item EMFILE +There are no more file descriptors available---your program is already +using the maximum. In BSD and GNU, the maximum is controlled by a +resource limit that can be changed; @pxref{Limits on Resources}, for +more information about the @code{RLIMIT_NOFILE} limit. +@end table + +@code{ENFILE} is not a possible error code for @code{dup2} because +@code{dup2} does not create a new opening of a file; duplicate +descriptors do not count toward the limit which @code{ENFILE} +indicates. @code{EMFILE} is possible because it refers to the limit on +distinct descriptor numbers in use in one process. +@end deftypevr + +Here is an example showing how to use @code{dup2} to do redirection. +Typically, redirection of the standard streams (like @code{stdin}) is +done by a shell or shell-like program before calling one of the +@code{exec} functions (@pxref{Executing a File}) to execute a new +program in a child process. When the new program is executed, it +creates and initializes the standard streams to point to the +corresponding file descriptors, before its @code{main} function is +invoked. + +So, to redirect standard input to a file, the shell could do something +like: + +@smallexample +pid = fork (); +if (pid == 0) + @{ + char *filename; + char *program; + int file; + @dots{} + file = TEMP_FAILURE_RETRY (open (filename, O_RDONLY)); + dup2 (file, STDIN_FILENO); + TEMP_FAILURE_RETRY (close (file)); + execv (program, NULL); + @} +@end smallexample + +There is also a more detailed example showing how to implement redirection +in the context of a pipeline of processes in @ref{Launching Jobs}. + + +@node Descriptor Flags +@section File Descriptor Flags +@cindex file descriptor flags + +@dfn{File descriptor flags} are miscellaneous attributes of a file +descriptor. These flags are associated with particular file +descriptors, so that if you have created duplicate file descriptors +from a single opening of a file, each descriptor has its own set of flags. + +Currently there is just one file descriptor flag: @code{FD_CLOEXEC}, +which causes the descriptor to be closed if you use any of the +@code{exec@dots{}} functions (@pxref{Executing a File}). + +The symbols in this section are defined in the header file +@file{fcntl.h}. +@pindex fcntl.h + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_GETFD +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should return the file descriptor flags associated +with the @var{filedes} argument. + +The normal return value from @code{fcntl} with this command is a +nonnegative number which can be interpreted as the bitwise OR of the +individual flags (except that currently there is only one flag to use). + +In case of an error, @code{fcntl} returns @math{-1}. The following +@code{errno} error conditions are defined for this command: + +@table @code +@item EBADF +The @var{filedes} argument is invalid. +@end table +@end deftypevr + + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_SETFD +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should set the file descriptor flags associated with the +@var{filedes} argument. This requires a third @code{int} argument to +specify the new flags, so the form of the call is: + +@smallexample +fcntl (@var{filedes}, F_SETFD, @var{new-flags}) +@end smallexample + +The normal return value from @code{fcntl} with this command is an +unspecified value other than @math{-1}, which indicates an error. +The flags and error conditions are the same as for the @code{F_GETFD} +command. +@end deftypevr + +The following macro is defined for use as a file descriptor flag with +the @code{fcntl} function. The value is an integer constant usable +as a bit mask value. + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int FD_CLOEXEC +@cindex close-on-exec (file descriptor flag) +This flag specifies that the file descriptor should be closed when +an @code{exec} function is invoked; see @ref{Executing a File}. When +a file descriptor is allocated (as with @code{open} or @code{dup}), +this bit is initially cleared on the new file descriptor, meaning that +descriptor will survive into the new program after @code{exec}. +@end deftypevr + +If you want to modify the file descriptor flags, you should get the +current flags with @code{F_GETFD} and modify the value. Don't assume +that the flags listed here are the only ones that are implemented; your +program may be run years from now and more flags may exist then. For +example, here is a function to set or clear the flag @code{FD_CLOEXEC} +without altering any other flags: + +@smallexample +/* @r{Set the @code{FD_CLOEXEC} flag of @var{desc} if @var{value} is nonzero,} + @r{or clear the flag if @var{value} is 0.} + @r{Return 0 on success, or -1 on error with @code{errno} set.} */ + +int +set_cloexec_flag (int desc, int value) +@{ + int oldflags = fcntl (desc, F_GETFD, 0); + /* @r{If reading the flags failed, return error indication now.} */ + if (oldflags < 0) + return oldflags; + /* @r{Set just the flag we want to set.} */ + if (value != 0) + oldflags |= FD_CLOEXEC; + else + oldflags &= ~FD_CLOEXEC; + /* @r{Store modified flag word in the descriptor.} */ + return fcntl (desc, F_SETFD, oldflags); +@} +@end smallexample + +@node File Status Flags +@section File Status Flags +@cindex file status flags + +@dfn{File status flags} are used to specify attributes of the opening of a +file. Unlike the file descriptor flags discussed in @ref{Descriptor +Flags}, the file status flags are shared by duplicated file descriptors +resulting from a single opening of the file. The file status flags are +specified with the @var{flags} argument to @code{open}; +@pxref{Opening and Closing Files}. + +File status flags fall into three categories, which are described in the +following sections. + +@itemize @bullet +@item +@ref{Access Modes}, specify what type of access is allowed to the +file: reading, writing, or both. They are set by @code{open} and are +returned by @code{fcntl}, but cannot be changed. + +@item +@ref{Open-time Flags}, control details of what @code{open} will do. +These flags are not preserved after the @code{open} call. + +@item +@ref{Operating Modes}, affect how operations such as @code{read} and +@code{write} are done. They are set by @code{open}, and can be fetched or +changed with @code{fcntl}. +@end itemize + +The symbols in this section are defined in the header file +@file{fcntl.h}. +@pindex fcntl.h + +@menu +* Access Modes:: Whether the descriptor can read or write. +* Open-time Flags:: Details of @code{open}. +* Operating Modes:: Special modes to control I/O operations. +* Getting File Status Flags:: Fetching and changing these flags. +@end menu + +@node Access Modes +@subsection File Access Modes + +The file access modes allow a file descriptor to be used for reading, +writing, or both. (On @gnuhurdsystems{}, they can also allow none of these, +and allow execution of the file as a program.) The access modes are chosen +when the file is opened, and never change. + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_RDONLY +Open the file for read access. +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_WRONLY +Open the file for write access. +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_RDWR +Open the file for both reading and writing. +@end deftypevr + +On @gnuhurdsystems{} (and not on other systems), @code{O_RDONLY} and +@code{O_WRONLY} are independent bits that can be bitwise-ORed together, +and it is valid for either bit to be set or clear. This means that +@code{O_RDWR} is the same as @code{O_RDONLY|O_WRONLY}. A file access +mode of zero is permissible; it allows no operations that do input or +output to the file, but does allow other operations such as +@code{fchmod}. On @gnuhurdsystems{}, since ``read-only'' or ``write-only'' +is a misnomer, @file{fcntl.h} defines additional names for the file +access modes. These names are preferred when writing GNU-specific code. +But most programs will want to be portable to other POSIX.1 systems and +should use the POSIX.1 names above instead. + +@comment fcntl.h (optional) +@comment GNU +@deftypevr Macro int O_READ +Open the file for reading. Same as @code{O_RDONLY}; only defined on GNU. +@end deftypevr + +@comment fcntl.h (optional) +@comment GNU +@deftypevr Macro int O_WRITE +Open the file for writing. Same as @code{O_WRONLY}; only defined on GNU. +@end deftypevr + +@comment fcntl.h (optional) +@comment GNU +@deftypevr Macro int O_EXEC +Open the file for executing. Only defined on GNU. +@end deftypevr + +To determine the file access mode with @code{fcntl}, you must extract +the access mode bits from the retrieved file status flags. On +@gnuhurdsystems{}, +you can just test the @code{O_READ} and @code{O_WRITE} bits in +the flags word. But in other POSIX.1 systems, reading and writing +access modes are not stored as distinct bit flags. The portable way to +extract the file access mode bits is with @code{O_ACCMODE}. + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_ACCMODE +This macro stands for a mask that can be bitwise-ANDed with the file +status flag value to produce a value representing the file access mode. +The mode will be @code{O_RDONLY}, @code{O_WRONLY}, or @code{O_RDWR}. +(On @gnuhurdsystems{} it could also be zero, and it never includes the +@code{O_EXEC} bit.) +@end deftypevr + +@node Open-time Flags +@subsection Open-time Flags + +The open-time flags specify options affecting how @code{open} will behave. +These options are not preserved once the file is open. The exception to +this is @code{O_NONBLOCK}, which is also an I/O operating mode and so it +@emph{is} saved. @xref{Opening and Closing Files}, for how to call +@code{open}. + +There are two sorts of options specified by open-time flags. + +@itemize @bullet +@item +@dfn{File name translation flags} affect how @code{open} looks up the +file name to locate the file, and whether the file can be created. +@cindex file name translation flags +@cindex flags, file name translation + +@item +@dfn{Open-time action flags} specify extra operations that @code{open} will +perform on the file once it is open. +@cindex open-time action flags +@cindex flags, open-time action +@end itemize + +Here are the file name translation flags. + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_CREAT +If set, the file will be created if it doesn't already exist. +@c !!! mode arg, umask +@cindex create on open (file status flag) +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_EXCL +If both @code{O_CREAT} and @code{O_EXCL} are set, then @code{open} fails +if the specified file already exists. This is guaranteed to never +clobber an existing file. +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_NONBLOCK +@cindex non-blocking open +This prevents @code{open} from blocking for a ``long time'' to open the +file. This is only meaningful for some kinds of files, usually devices +such as serial ports; when it is not meaningful, it is harmless and +ignored. Often, opening a port to a modem blocks until the modem reports +carrier detection; if @code{O_NONBLOCK} is specified, @code{open} will +return immediately without a carrier. + +Note that the @code{O_NONBLOCK} flag is overloaded as both an I/O operating +mode and a file name translation flag. This means that specifying +@code{O_NONBLOCK} in @code{open} also sets nonblocking I/O mode; +@pxref{Operating Modes}. To open the file without blocking but do normal +I/O that blocks, you must call @code{open} with @code{O_NONBLOCK} set and +then call @code{fcntl} to turn the bit off. +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_NOCTTY +If the named file is a terminal device, don't make it the controlling +terminal for the process. @xref{Job Control}, for information about +what it means to be the controlling terminal. + +On @gnuhurdsystems{} and 4.4 BSD, opening a file never makes it the +controlling terminal and @code{O_NOCTTY} is zero. However, @gnulinuxsystems{} +and some other systems use a nonzero value for @code{O_NOCTTY} and set the +controlling terminal when you open a file that is a terminal device; so +to be portable, use @code{O_NOCTTY} when it is important to avoid this. +@cindex controlling terminal, setting +@end deftypevr + +The following three file name translation flags exist only on +@gnuhurdsystems{}. + +@comment fcntl.h (optional) +@comment GNU +@deftypevr Macro int O_IGNORE_CTTY +Do not recognize the named file as the controlling terminal, even if it +refers to the process's existing controlling terminal device. Operations +on the new file descriptor will never induce job control signals. +@xref{Job Control}. +@end deftypevr + +@comment fcntl.h (optional) +@comment GNU +@deftypevr Macro int O_NOLINK +If the named file is a symbolic link, open the link itself instead of +the file it refers to. (@code{fstat} on the new file descriptor will +return the information returned by @code{lstat} on the link's name.) +@cindex symbolic link, opening +@end deftypevr + +@comment fcntl.h (optional) +@comment GNU +@deftypevr Macro int O_NOTRANS +If the named file is specially translated, do not invoke the translator. +Open the bare file the translator itself sees. +@end deftypevr + + +The open-time action flags tell @code{open} to do additional operations +which are not really related to opening the file. The reason to do them +as part of @code{open} instead of in separate calls is that @code{open} +can do them @i{atomically}. + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_TRUNC +Truncate the file to zero length. This option is only useful for +regular files, not special files such as directories or FIFOs. POSIX.1 +requires that you open the file for writing to use @code{O_TRUNC}. In +BSD and GNU you must have permission to write the file to truncate it, +but you need not open for write access. + +This is the only open-time action flag specified by POSIX.1. There is +no good reason for truncation to be done by @code{open}, instead of by +calling @code{ftruncate} afterwards. The @code{O_TRUNC} flag existed in +Unix before @code{ftruncate} was invented, and is retained for backward +compatibility. +@end deftypevr + +The remaining operating modes are BSD extensions. They exist only +on some systems. On other systems, these macros are not defined. + +@comment fcntl.h (optional) +@comment BSD +@deftypevr Macro int O_SHLOCK +Acquire a shared lock on the file, as with @code{flock}. +@xref{File Locks}. + +If @code{O_CREAT} is specified, the locking is done atomically when +creating the file. You are guaranteed that no other process will get +the lock on the new file first. +@end deftypevr + +@comment fcntl.h (optional) +@comment BSD +@deftypevr Macro int O_EXLOCK +Acquire an exclusive lock on the file, as with @code{flock}. +@xref{File Locks}. This is atomic like @code{O_SHLOCK}. +@end deftypevr + +@node Operating Modes +@subsection I/O Operating Modes + +The operating modes affect how input and output operations using a file +descriptor work. These flags are set by @code{open} and can be fetched +and changed with @code{fcntl}. + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_APPEND +The bit that enables append mode for the file. If set, then all +@code{write} operations write the data at the end of the file, extending +it, regardless of the current file position. This is the only reliable +way to append to a file. In append mode, you are guaranteed that the +data you write will always go to the current end of the file, regardless +of other processes writing to the file. Conversely, if you simply set +the file position to the end of file and write, then another process can +extend the file after you set the file position but before you write, +resulting in your data appearing someplace before the real end of file. +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int O_NONBLOCK +The bit that enables nonblocking mode for the file. If this bit is set, +@code{read} requests on the file can return immediately with a failure +status if there is no input immediately available, instead of blocking. +Likewise, @code{write} requests can also return immediately with a +failure status if the output can't be written immediately. + +Note that the @code{O_NONBLOCK} flag is overloaded as both an I/O +operating mode and a file name translation flag; @pxref{Open-time Flags}. +@end deftypevr + +@comment fcntl.h +@comment BSD +@deftypevr Macro int O_NDELAY +This is an obsolete name for @code{O_NONBLOCK}, provided for +compatibility with BSD. It is not defined by the POSIX.1 standard. +@end deftypevr + +The remaining operating modes are BSD and GNU extensions. They exist only +on some systems. On other systems, these macros are not defined. + +@comment fcntl.h +@comment BSD +@deftypevr Macro int O_ASYNC +The bit that enables asynchronous input mode. If set, then @code{SIGIO} +signals will be generated when input is available. @xref{Interrupt Input}. + +Asynchronous input mode is a BSD feature. +@end deftypevr + +@comment fcntl.h +@comment BSD +@deftypevr Macro int O_FSYNC +The bit that enables synchronous writing for the file. If set, each +@code{write} call will make sure the data is reliably stored on disk before +returning. @c !!! xref fsync + +Synchronous writing is a BSD feature. +@end deftypevr + +@comment fcntl.h +@comment BSD +@deftypevr Macro int O_SYNC +This is another name for @code{O_FSYNC}. They have the same value. +@end deftypevr + +@comment fcntl.h +@comment GNU +@deftypevr Macro int O_NOATIME +If this bit is set, @code{read} will not update the access time of the +file. @xref{File Times}. This is used by programs that do backups, so +that backing a file up does not count as reading it. +Only the owner of the file or the superuser may use this bit. + +This is a GNU extension. +@end deftypevr + +@node Getting File Status Flags +@subsection Getting and Setting File Status Flags + +The @code{fcntl} function can fetch or change file status flags. + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_GETFL +This macro is used as the @var{command} argument to @code{fcntl}, to +read the file status flags for the open file with descriptor +@var{filedes}. + +The normal return value from @code{fcntl} with this command is a +nonnegative number which can be interpreted as the bitwise OR of the +individual flags. Since the file access modes are not single-bit values, +you can mask off other bits in the returned flags with @code{O_ACCMODE} +to compare them. + +In case of an error, @code{fcntl} returns @math{-1}. The following +@code{errno} error conditions are defined for this command: + +@table @code +@item EBADF +The @var{filedes} argument is invalid. +@end table +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_SETFL +This macro is used as the @var{command} argument to @code{fcntl}, to set +the file status flags for the open file corresponding to the +@var{filedes} argument. This command requires a third @code{int} +argument to specify the new flags, so the call looks like this: + +@smallexample +fcntl (@var{filedes}, F_SETFL, @var{new-flags}) +@end smallexample + +You can't change the access mode for the file in this way; that is, +whether the file descriptor was opened for reading or writing. + +The normal return value from @code{fcntl} with this command is an +unspecified value other than @math{-1}, which indicates an error. The +error conditions are the same as for the @code{F_GETFL} command. +@end deftypevr + +If you want to modify the file status flags, you should get the current +flags with @code{F_GETFL} and modify the value. Don't assume that the +flags listed here are the only ones that are implemented; your program +may be run years from now and more flags may exist then. For example, +here is a function to set or clear the flag @code{O_NONBLOCK} without +altering any other flags: + +@smallexample +@group +/* @r{Set the @code{O_NONBLOCK} flag of @var{desc} if @var{value} is nonzero,} + @r{or clear the flag if @var{value} is 0.} + @r{Return 0 on success, or -1 on error with @code{errno} set.} */ + +int +set_nonblock_flag (int desc, int value) +@{ + int oldflags = fcntl (desc, F_GETFL, 0); + /* @r{If reading the flags failed, return error indication now.} */ + if (oldflags == -1) + return -1; + /* @r{Set just the flag we want to set.} */ + if (value != 0) + oldflags |= O_NONBLOCK; + else + oldflags &= ~O_NONBLOCK; + /* @r{Store modified flag word in the descriptor.} */ + return fcntl (desc, F_SETFL, oldflags); +@} +@end group +@end smallexample + +@node File Locks +@section File Locks + +@cindex file locks +@cindex record locking +This section describes record locks that are associated with the process. +There is also a different type of record lock that is associated with the +open file description instead of the process. @xref{Open File Description Locks}. + +The remaining @code{fcntl} commands are used to support @dfn{record +locking}, which permits multiple cooperating programs to prevent each +other from simultaneously accessing parts of a file in error-prone +ways. + +@cindex exclusive lock +@cindex write lock +An @dfn{exclusive} or @dfn{write} lock gives a process exclusive access +for writing to the specified part of the file. While a write lock is in +place, no other process can lock that part of the file. + +@cindex shared lock +@cindex read lock +A @dfn{shared} or @dfn{read} lock prohibits any other process from +requesting a write lock on the specified part of the file. However, +other processes can request read locks. + +The @code{read} and @code{write} functions do not actually check to see +whether there are any locks in place. If you want to implement a +locking protocol for a file shared by multiple processes, your application +must do explicit @code{fcntl} calls to request and clear locks at the +appropriate points. + +Locks are associated with processes. A process can only have one kind +of lock set for each byte of a given file. When any file descriptor for +that file is closed by the process, all of the locks that process holds +on that file are released, even if the locks were made using other +descriptors that remain open. Likewise, locks are released when a +process exits, and are not inherited by child processes created using +@code{fork} (@pxref{Creating a Process}). + +When making a lock, use a @code{struct flock} to specify what kind of +lock and where. This data type and the associated macros for the +@code{fcntl} function are declared in the header file @file{fcntl.h}. +@pindex fcntl.h + +@comment fcntl.h +@comment POSIX.1 +@deftp {Data Type} {struct flock} +This structure is used with the @code{fcntl} function to describe a file +lock. It has these members: + +@table @code +@item short int l_type +Specifies the type of the lock; one of @code{F_RDLCK}, @code{F_WRLCK}, or +@code{F_UNLCK}. + +@item short int l_whence +This corresponds to the @var{whence} argument to @code{fseek} or +@code{lseek}, and specifies what the offset is relative to. Its value +can be one of @code{SEEK_SET}, @code{SEEK_CUR}, or @code{SEEK_END}. + +@item off_t l_start +This specifies the offset of the start of the region to which the lock +applies, and is given in bytes relative to the point specified by the +@code{l_whence} member. + +@item off_t l_len +This specifies the length of the region to be locked. A value of +@code{0} is treated specially; it means the region extends to the end of +the file. + +@item pid_t l_pid +This field is the process ID (@pxref{Process Creation Concepts}) of the +process holding the lock. It is filled in by calling @code{fcntl} with +the @code{F_GETLK} command, but is ignored when making a lock. If the +conflicting lock is an open file description lock +(@pxref{Open File Description Locks}), then this field will be set to +@math{-1}. +@end table +@end deftp + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_GETLK +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should get information about a lock. This command +requires a third argument of type @w{@code{struct flock *}} to be passed +to @code{fcntl}, so that the form of the call is: + +@smallexample +fcntl (@var{filedes}, F_GETLK, @var{lockp}) +@end smallexample + +If there is a lock already in place that would block the lock described +by the @var{lockp} argument, information about that lock overwrites +@code{*@var{lockp}}. Existing locks are not reported if they are +compatible with making a new lock as specified. Thus, you should +specify a lock type of @code{F_WRLCK} if you want to find out about both +read and write locks, or @code{F_RDLCK} if you want to find out about +write locks only. + +There might be more than one lock affecting the region specified by the +@var{lockp} argument, but @code{fcntl} only returns information about +one of them. The @code{l_whence} member of the @var{lockp} structure is +set to @code{SEEK_SET} and the @code{l_start} and @code{l_len} fields +set to identify the locked region. + +If no lock applies, the only change to the @var{lockp} structure is to +update the @code{l_type} to a value of @code{F_UNLCK}. + +The normal return value from @code{fcntl} with this command is an +unspecified value other than @math{-1}, which is reserved to indicate an +error. The following @code{errno} error conditions are defined for +this command: + +@table @code +@item EBADF +The @var{filedes} argument is invalid. + +@item EINVAL +Either the @var{lockp} argument doesn't specify valid lock information, +or the file associated with @var{filedes} doesn't support locks. +@end table +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_SETLK +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should set or clear a lock. This command requires a +third argument of type @w{@code{struct flock *}} to be passed to +@code{fcntl}, so that the form of the call is: + +@smallexample +fcntl (@var{filedes}, F_SETLK, @var{lockp}) +@end smallexample + +If the process already has a lock on any part of the region, the old lock +on that part is replaced with the new lock. You can remove a lock +by specifying a lock type of @code{F_UNLCK}. + +If the lock cannot be set, @code{fcntl} returns immediately with a value +of @math{-1}. This function does not block while waiting for other processes +to release locks. If @code{fcntl} succeeds, it returns a value other +than @math{-1}. + +The following @code{errno} error conditions are defined for this +function: + +@table @code +@item EAGAIN +@itemx EACCES +The lock cannot be set because it is blocked by an existing lock on the +file. Some systems use @code{EAGAIN} in this case, and other systems +use @code{EACCES}; your program should treat them alike, after +@code{F_SETLK}. (@gnulinuxhurdsystems{} always use @code{EAGAIN}.) + +@item EBADF +Either: the @var{filedes} argument is invalid; you requested a read lock +but the @var{filedes} is not open for read access; or, you requested a +write lock but the @var{filedes} is not open for write access. + +@item EINVAL +Either the @var{lockp} argument doesn't specify valid lock information, +or the file associated with @var{filedes} doesn't support locks. + +@item ENOLCK +The system has run out of file lock resources; there are already too +many file locks in place. + +Well-designed file systems never report this error, because they have no +limitation on the number of locks. However, you must still take account +of the possibility of this error, as it could result from network access +to a file system on another machine. +@end table +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_SETLKW +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should set or clear a lock. It is just like the +@code{F_SETLK} command, but causes the process to block (or wait) +until the request can be specified. + +This command requires a third argument of type @code{struct flock *}, as +for the @code{F_SETLK} command. + +The @code{fcntl} return values and errors are the same as for the +@code{F_SETLK} command, but these additional @code{errno} error conditions +are defined for this command: + +@table @code +@item EINTR +The function was interrupted by a signal while it was waiting. +@xref{Interrupted Primitives}. + +@item EDEADLK +The specified region is being locked by another process. But that +process is waiting to lock a region which the current process has +locked, so waiting for the lock would result in deadlock. The system +does not guarantee that it will detect all such conditions, but it lets +you know if it notices one. +@end table +@end deftypevr + + +The following macros are defined for use as values for the @code{l_type} +member of the @code{flock} structure. The values are integer constants. + +@vtable @code +@comment fcntl.h +@comment POSIX.1 +@item F_RDLCK +This macro is used to specify a read (or shared) lock. + +@comment fcntl.h +@comment POSIX.1 +@item F_WRLCK +This macro is used to specify a write (or exclusive) lock. + +@comment fcntl.h +@comment POSIX.1 +@item F_UNLCK +This macro is used to specify that the region is unlocked. +@end vtable + +As an example of a situation where file locking is useful, consider a +program that can be run simultaneously by several different users, that +logs status information to a common file. One example of such a program +might be a game that uses a file to keep track of high scores. Another +example might be a program that records usage or accounting information +for billing purposes. + +Having multiple copies of the program simultaneously writing to the +file could cause the contents of the file to become mixed up. But +you can prevent this kind of problem by setting a write lock on the +file before actually writing to the file. + +If the program also needs to read the file and wants to make sure that +the contents of the file are in a consistent state, then it can also use +a read lock. While the read lock is set, no other process can lock +that part of the file for writing. + +@c ??? This section could use an example program. + +Remember that file locks are only an @emph{advisory} protocol for +controlling access to a file. There is still potential for access to +the file by programs that don't use the lock protocol. + +@node Open File Description Locks +@section Open File Description Locks + +In contrast to process-associated record locks (@pxref{File Locks}), +open file description record locks are associated with an open file +description rather than a process. + +Using @code{fcntl} to apply an open file description lock on a region that +already has an existing open file description lock that was created via the +same file descriptor will never cause a lock conflict. + +Open file description locks are also inherited by child processes across +@code{fork}, or @code{clone} with @code{CLONE_FILES} set +(@pxref{Creating a Process}), along with the file descriptor. + +It is important to distinguish between the open file @emph{description} (an +instance of an open file, usually created by a call to @code{open}) and +an open file @emph{descriptor}, which is a numeric value that refers to the +open file description. The locks described here are associated with the +open file @emph{description} and not the open file @emph{descriptor}. + +Using @code{dup} (@pxref{Duplicating Descriptors}) to copy a file +descriptor does not give you a new open file description, but rather copies a +reference to an existing open file description and assigns it to a new +file descriptor. Thus, open file description locks set on a file +descriptor cloned by @code{dup} will never conflict with open file +description locks set on the original descriptor since they refer to the +same open file description. Depending on the range and type of lock +involved, the original lock may be modified by a @code{F_OFD_SETLK} or +@code{F_OFD_SETLKW} command in this situation however. + +Open file description locks always conflict with process-associated locks, +even if acquired by the same process or on the same open file +descriptor. + +Open file description locks use the same @code{struct flock} as +process-associated locks as an argument (@pxref{File Locks}) and the +macros for the @code{command} values are also declared in the header file +@file{fcntl.h}. To use them, the macro @code{_GNU_SOURCE} must be +defined prior to including any header file. + +In contrast to process-associated locks, any @code{struct flock} used as +an argument to open file description lock commands must have the @code{l_pid} +value set to @math{0}. Also, when returning information about an +open file description lock in a @code{F_GETLK} or @code{F_OFD_GETLK} request, +the @code{l_pid} field in @code{struct flock} will be set to @math{-1} +to indicate that the lock is not associated with a process. + +When the same @code{struct flock} is reused as an argument to a +@code{F_OFD_SETLK} or @code{F_OFD_SETLKW} request after being used for an +@code{F_OFD_GETLK} request, it is necessary to inspect and reset the +@code{l_pid} field to @math{0}. + +@pindex fcntl.h. + +@deftypevr Macro int F_OFD_GETLK +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should get information about a lock. This command +requires a third argument of type @w{@code{struct flock *}} to be passed +to @code{fcntl}, so that the form of the call is: + +@smallexample +fcntl (@var{filedes}, F_OFD_GETLK, @var{lockp}) +@end smallexample + +If there is a lock already in place that would block the lock described +by the @var{lockp} argument, information about that lock is written to +@code{*@var{lockp}}. Existing locks are not reported if they are +compatible with making a new lock as specified. Thus, you should +specify a lock type of @code{F_WRLCK} if you want to find out about both +read and write locks, or @code{F_RDLCK} if you want to find out about +write locks only. + +There might be more than one lock affecting the region specified by the +@var{lockp} argument, but @code{fcntl} only returns information about +one of them. Which lock is returned in this situation is undefined. + +The @code{l_whence} member of the @var{lockp} structure are set to +@code{SEEK_SET} and the @code{l_start} and @code{l_len} fields are set +to identify the locked region. + +If no conflicting lock exists, the only change to the @var{lockp} structure +is to update the @code{l_type} field to the value @code{F_UNLCK}. + +The normal return value from @code{fcntl} with this command is either @math{0} +on success or @math{-1}, which indicates an error. The following @code{errno} +error conditions are defined for this command: + +@table @code +@item EBADF +The @var{filedes} argument is invalid. + +@item EINVAL +Either the @var{lockp} argument doesn't specify valid lock information, +the operating system kernel doesn't support open file description locks, or the file +associated with @var{filedes} doesn't support locks. +@end table +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_OFD_SETLK +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should set or clear a lock. This command requires a +third argument of type @w{@code{struct flock *}} to be passed to +@code{fcntl}, so that the form of the call is: + +@smallexample +fcntl (@var{filedes}, F_OFD_SETLK, @var{lockp}) +@end smallexample + +If the open file already has a lock on any part of the +region, the old lock on that part is replaced with the new lock. You +can remove a lock by specifying a lock type of @code{F_UNLCK}. + +If the lock cannot be set, @code{fcntl} returns immediately with a value +of @math{-1}. This command does not wait for other tasks +to release locks. If @code{fcntl} succeeds, it returns @math{0}. + +The following @code{errno} error conditions are defined for this +command: + +@table @code +@item EAGAIN +The lock cannot be set because it is blocked by an existing lock on the +file. + +@item EBADF +Either: the @var{filedes} argument is invalid; you requested a read lock +but the @var{filedes} is not open for read access; or, you requested a +write lock but the @var{filedes} is not open for write access. + +@item EINVAL +Either the @var{lockp} argument doesn't specify valid lock information, +the operating system kernel doesn't support open file description locks, or the +file associated with @var{filedes} doesn't support locks. + +@item ENOLCK +The system has run out of file lock resources; there are already too +many file locks in place. + +Well-designed file systems never report this error, because they have no +limitation on the number of locks. However, you must still take account +of the possibility of this error, as it could result from network access +to a file system on another machine. +@end table +@end deftypevr + +@comment fcntl.h +@comment POSIX.1 +@deftypevr Macro int F_OFD_SETLKW +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should set or clear a lock. It is just like the +@code{F_OFD_SETLK} command, but causes the process to wait until the request +can be completed. + +This command requires a third argument of type @code{struct flock *}, as +for the @code{F_OFD_SETLK} command. + +The @code{fcntl} return values and errors are the same as for the +@code{F_OFD_SETLK} command, but these additional @code{errno} error conditions +are defined for this command: + +@table @code +@item EINTR +The function was interrupted by a signal while it was waiting. +@xref{Interrupted Primitives}. + +@end table +@end deftypevr + +Open file description locks are useful in the same sorts of situations as +process-associated locks. They can also be used to synchronize file +access between threads within the same process by having each thread perform +its own @code{open} of the file, to obtain its own open file description. + +Because open file description locks are automatically freed only upon +closing the last file descriptor that refers to the open file +description, this locking mechanism avoids the possibility that locks +are inadvertently released due to a library routine opening and closing +a file without the application being aware. + +As with process-associated locks, open file description locks are advisory. + +@node Open File Description Locks Example +@section Open File Description Locks Example + +Here is an example of using open file description locks in a threaded +program. If this program used process-associated locks, then it would be +subject to data corruption because process-associated locks are shared +by the threads inside a process, and thus cannot be used by one thread +to lock out another thread in the same process. + +Proper error handling has been omitted in the following program for +brevity. + +@smallexample +@include ofdlocks.c.texi +@end smallexample + +This example creates three threads each of which loops five times, +appending to the file. Access to the file is serialized via open file +description locks. If we compile and run the above program, we'll end up +with /tmp/foo that has 15 lines in it. + +If we, however, were to replace the @code{F_OFD_SETLK} and +@code{F_OFD_SETLKW} commands with their process-associated lock +equivalents, the locking essentially becomes a noop since it is all done +within the context of the same process. That leads to data corruption +(typically manifested as missing lines) as some threads race in and +overwrite the data written by others. + +@node Interrupt Input +@section Interrupt-Driven Input + +@cindex interrupt-driven input +If you set the @code{O_ASYNC} status flag on a file descriptor +(@pxref{File Status Flags}), a @code{SIGIO} signal is sent whenever +input or output becomes possible on that file descriptor. The process +or process group to receive the signal can be selected by using the +@code{F_SETOWN} command to the @code{fcntl} function. If the file +descriptor is a socket, this also selects the recipient of @code{SIGURG} +signals that are delivered when out-of-band data arrives on that socket; +see @ref{Out-of-Band Data}. (@code{SIGURG} is sent in any situation +where @code{select} would report the socket as having an ``exceptional +condition''. @xref{Waiting for I/O}.) + +If the file descriptor corresponds to a terminal device, then @code{SIGIO} +signals are sent to the foreground process group of the terminal. +@xref{Job Control}. + +@pindex fcntl.h +The symbols in this section are defined in the header file +@file{fcntl.h}. + +@comment fcntl.h +@comment BSD +@deftypevr Macro int F_GETOWN +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should get information about the process or process +group to which @code{SIGIO} signals are sent. (For a terminal, this is +actually the foreground process group ID, which you can get using +@code{tcgetpgrp}; see @ref{Terminal Access Functions}.) + +The return value is interpreted as a process ID; if negative, its +absolute value is the process group ID. + +The following @code{errno} error condition is defined for this command: + +@table @code +@item EBADF +The @var{filedes} argument is invalid. +@end table +@end deftypevr + +@comment fcntl.h +@comment BSD +@deftypevr Macro int F_SETOWN +This macro is used as the @var{command} argument to @code{fcntl}, to +specify that it should set the process or process group to which +@code{SIGIO} signals are sent. This command requires a third argument +of type @code{pid_t} to be passed to @code{fcntl}, so that the form of +the call is: + +@smallexample +fcntl (@var{filedes}, F_SETOWN, @var{pid}) +@end smallexample + +The @var{pid} argument should be a process ID. You can also pass a +negative number whose absolute value is a process group ID. + +The return value from @code{fcntl} with this command is @math{-1} +in case of error and some other value if successful. The following +@code{errno} error conditions are defined for this command: + +@table @code +@item EBADF +The @var{filedes} argument is invalid. + +@item ESRCH +There is no process or process group corresponding to @var{pid}. +@end table +@end deftypevr + +@c ??? This section could use an example program. + +@node IOCTLs +@section Generic I/O Control operations +@cindex generic i/o control operations +@cindex IOCTLs + +@gnusystems{} can handle most input/output operations on many different +devices and objects in terms of a few file primitives - @code{read}, +@code{write} and @code{lseek}. However, most devices also have a few +peculiar operations which do not fit into this model. Such as: + +@itemize @bullet + +@item +Changing the character font used on a terminal. + +@item +Telling a magnetic tape system to rewind or fast forward. (Since they +cannot move in byte increments, @code{lseek} is inapplicable). + +@item +Ejecting a disk from a drive. + +@item +Playing an audio track from a CD-ROM drive. + +@item +Maintaining routing tables for a network. + +@end itemize + +Although some such objects such as sockets and terminals +@footnote{Actually, the terminal-specific functions are implemented with +IOCTLs on many platforms.} have special functions of their own, it would +not be practical to create functions for all these cases. + +Instead these minor operations, known as @dfn{IOCTL}s, are assigned code +numbers and multiplexed through the @code{ioctl} function, defined in +@code{sys/ioctl.h}. The code numbers themselves are defined in many +different headers. + +@comment sys/ioctl.h +@comment BSD +@deftypefun int ioctl (int @var{filedes}, int @var{command}, @dots{}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +The @code{ioctl} function performs the generic I/O operation +@var{command} on @var{filedes}. + +A third argument is usually present, either a single number or a pointer +to a structure. The meaning of this argument, the returned value, and +any error codes depends upon the command used. Often @math{-1} is +returned for a failure. + +@end deftypefun + +On some systems, IOCTLs used by different devices share the same numbers. +Thus, although use of an inappropriate IOCTL @emph{usually} only produces +an error, you should not attempt to use device-specific IOCTLs on an +unknown device. + +Most IOCTLs are OS-specific and/or only used in special system utilities, +and are thus beyond the scope of this document. For an example of the use +of an IOCTL, see @ref{Out-of-Band Data}. + +@c FIXME this is undocumented: +@c dup3 diff --git a/REORG.TODO/manual/locale.texi b/REORG.TODO/manual/locale.texi new file mode 100644 index 0000000000..ae71ccc906 --- /dev/null +++ b/REORG.TODO/manual/locale.texi @@ -0,0 +1,1468 @@ +@node Locales, Message Translation, Character Set Handling, Top +@c %MENU% The country and language can affect the behavior of library functions +@chapter Locales and Internationalization + +Different countries and cultures have varying conventions for how to +communicate. These conventions range from very simple ones, such as the +format for representing dates and times, to very complex ones, such as +the language spoken. + +@cindex internationalization +@cindex locales +@dfn{Internationalization} of software means programming it to be able +to adapt to the user's favorite conventions. In @w{ISO C}, +internationalization works by means of @dfn{locales}. Each locale +specifies a collection of conventions, one convention for each purpose. +The user chooses a set of conventions by specifying a locale (via +environment variables). + +All programs inherit the chosen locale as part of their environment. +Provided the programs are written to obey the choice of locale, they +will follow the conventions preferred by the user. + +@menu +* Effects of Locale:: Actions affected by the choice of + locale. +* Choosing Locale:: How the user specifies a locale. +* Locale Categories:: Different purposes for which you can + select a locale. +* Setting the Locale:: How a program specifies the locale + with library functions. +* Standard Locales:: Locale names available on all systems. +* Locale Names:: Format of system-specific locale names. +* Locale Information:: How to access the information for the locale. +* Formatting Numbers:: A dedicated function to format numbers. +* Yes-or-No Questions:: Check a Response against the locale. +@end menu + +@node Effects of Locale, Choosing Locale, , Locales +@section What Effects a Locale Has + +Each locale specifies conventions for several purposes, including the +following: + +@itemize @bullet +@item +What multibyte character sequences are valid, and how they are +interpreted (@pxref{Character Set Handling}). + +@item +Classification of which characters in the local character set are +considered alphabetic, and upper- and lower-case conversion conventions +(@pxref{Character Handling}). + +@item +The collating sequence for the local language and character set +(@pxref{Collation Functions}). + +@item +Formatting of numbers and currency amounts (@pxref{General Numeric}). + +@item +Formatting of dates and times (@pxref{Formatting Calendar Time}). + +@item +What language to use for output, including error messages +(@pxref{Message Translation}). + +@item +What language to use for user answers to yes-or-no questions +(@pxref{Yes-or-No Questions}). + +@item +What language to use for more complex user input. +(The C library doesn't yet help you implement this.) +@end itemize + +Some aspects of adapting to the specified locale are handled +automatically by the library subroutines. For example, all your program +needs to do in order to use the collating sequence of the chosen locale +is to use @code{strcoll} or @code{strxfrm} to compare strings. + +Other aspects of locales are beyond the comprehension of the library. +For example, the library can't automatically translate your program's +output messages into other languages. The only way you can support +output in the user's favorite language is to program this more or less +by hand. The C library provides functions to handle translations for +multiple languages easily. + +This chapter discusses the mechanism by which you can modify the current +locale. The effects of the current locale on specific library functions +are discussed in more detail in the descriptions of those functions. + +@node Choosing Locale, Locale Categories, Effects of Locale, Locales +@section Choosing a Locale + +The simplest way for the user to choose a locale is to set the +environment variable @code{LANG}. This specifies a single locale to use +for all purposes. For example, a user could specify a hypothetical +locale named @samp{espana-castellano} to use the standard conventions of +most of Spain. + +The set of locales supported depends on the operating system you are +using, and so do their names, except that the standard locale called +@samp{C} or @samp{POSIX} always exist. @xref{Locale Names}. + +In order to force the system to always use the default locale, the +user can set the @code{LC_ALL} environment variable to @samp{C}. + +@cindex combining locales +A user also has the option of specifying different locales for +different purposes---in effect, choosing a mixture of multiple +locales. @xref{Locale Categories}. + +For example, the user might specify the locale @samp{espana-castellano} +for most purposes, but specify the locale @samp{usa-english} for +currency formatting. This might make sense if the user is a +Spanish-speaking American, working in Spanish, but representing monetary +amounts in US dollars. + +Note that both locales @samp{espana-castellano} and @samp{usa-english}, +like all locales, would include conventions for all of the purposes to +which locales apply. However, the user can choose to use each locale +for a particular subset of those purposes. + +@node Locale Categories, Setting the Locale, Choosing Locale, Locales +@section Locale Categories +@cindex categories for locales +@cindex locale categories + +The purposes that locales serve are grouped into @dfn{categories}, so +that a user or a program can choose the locale for each category +independently. Here is a table of categories; each name is both an +environment variable that a user can set, and a macro name that you can +use as the first argument to @code{setlocale}. + +The contents of the environment variable (or the string in the second +argument to @code{setlocale}) has to be a valid locale name. +@xref{Locale Names}. + +@vtable @code +@comment locale.h +@comment ISO +@item LC_COLLATE +This category applies to collation of strings (functions @code{strcoll} +and @code{strxfrm}); see @ref{Collation Functions}. + +@comment locale.h +@comment ISO +@item LC_CTYPE +This category applies to classification and conversion of characters, +and to multibyte and wide characters; +see @ref{Character Handling}, and @ref{Character Set Handling}. + +@comment locale.h +@comment ISO +@item LC_MONETARY +This category applies to formatting monetary values; see @ref{General Numeric}. + +@comment locale.h +@comment ISO +@item LC_NUMERIC +This category applies to formatting numeric values that are not +monetary; see @ref{General Numeric}. + +@comment locale.h +@comment ISO +@item LC_TIME +This category applies to formatting date and time values; see +@ref{Formatting Calendar Time}. + +@comment locale.h +@comment XOPEN +@item LC_MESSAGES +This category applies to selecting the language used in the user +interface for message translation (@pxref{The Uniforum approach}; +@pxref{Message catalogs a la X/Open}) and contains regular expressions +for affirmative and negative responses. + +@comment locale.h +@comment ISO +@item LC_ALL +This is not a category; it is only a macro that you can use +with @code{setlocale} to set a single locale for all purposes. Setting +this environment variable overwrites all selections by the other +@code{LC_*} variables or @code{LANG}. + +@comment locale.h +@comment ISO +@item LANG +If this environment variable is defined, its value specifies the locale +to use for all purposes except as overridden by the variables above. +@end vtable + +@vindex LANGUAGE +When developing the message translation functions it was felt that the +functionality provided by the variables above is not sufficient. For +example, it should be possible to specify more than one locale name. +Take a Swedish user who better speaks German than English, and a program +whose messages are output in English by default. It should be possible +to specify that the first choice of language is Swedish, the second +German, and if this also fails to use English. This is +possible with the variable @code{LANGUAGE}. For further description of +this GNU extension see @ref{Using gettextized software}. + +@node Setting the Locale, Standard Locales, Locale Categories, Locales +@section How Programs Set the Locale + +A C program inherits its locale environment variables when it starts up. +This happens automatically. However, these variables do not +automatically control the locale used by the library functions, because +@w{ISO C} says that all programs start by default in the standard @samp{C} +locale. To use the locales specified by the environment, you must call +@code{setlocale}. Call it as follows: + +@smallexample +setlocale (LC_ALL, ""); +@end smallexample + +@noindent +to select a locale based on the user choice of the appropriate +environment variables. + +@cindex changing the locale +@cindex locale, changing +You can also use @code{setlocale} to specify a particular locale, for +general use or for a specific category. + +@pindex locale.h +The symbols in this section are defined in the header file @file{locale.h}. + +@comment locale.h +@comment ISO +@deftypefun {char *} setlocale (int @var{category}, const char *@var{locale}) +@safety{@prelim{}@mtunsafe{@mtasuconst{:@mtslocale{}} @mtsenv{}}@asunsafe{@asuinit{} @asulock{} @ascuheap{} @asucorrupt{}}@acunsafe{@acuinit{} @acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c Uses of the global locale object are unguarded in functions that +@c ought to be MT-Safe, so we're ruling out the use of this function +@c once threads are started. It takes a write lock itself, but it may +@c return a pointer loaded from the global locale object after releasing +@c the lock, or before taking it. +@c setlocale @mtasuconst:@mtslocale @mtsenv @asuinit @ascuheap @asulock @asucorrupt @acucorrupt @acsmem @acsfd @aculock +@c libc_rwlock_wrlock @asulock @aculock +@c libc_rwlock_unlock @aculock +@c getenv LOCPATH @mtsenv +@c malloc @ascuheap @acsmem +@c free @ascuheap @acsmem +@c new_composite_name ok +@c setdata ok +@c setname ok +@c _nl_find_locale @mtsenv @asuinit @ascuheap @asulock @asucorrupt @acucorrupt @acsmem @acsfd @aculock +@c getenv LC_ALL and LANG @mtsenv +@c _nl_load_locale_from_archive @ascuheap @acucorrupt @acsmem @acsfd +@c sysconf _SC_PAGE_SIZE ok +@c _nl_normalize_codeset @ascuheap @acsmem +@c isalnum_l ok (C locale) +@c isdigit_l ok (C locale) +@c malloc @ascuheap @acsmem +@c tolower_l ok (C locale) +@c open_not_cancel_2 @acsfd +@c fxstat64 ok +@c close_not_cancel_no_status ok +@c __mmap64 @acsmem +@c calculate_head_size ok +@c __munmap ok +@c compute_hashval ok +@c qsort dup @acucorrupt +@c rangecmp ok +@c malloc @ascuheap @acsmem +@c strdup @ascuheap @acsmem +@c _nl_intern_locale_data @ascuheap @acsmem +@c malloc @ascuheap @acsmem +@c free @ascuheap @acsmem +@c _nl_expand_alias @ascuheap @asulock @acsmem @acsfd @aculock +@c libc_lock_lock @asulock @aculock +@c bsearch ok +@c alias_compare ok +@c strcasecmp ok +@c read_alias_file @ascuheap @asulock @acsmem @acsfd @aculock +@c fopen @ascuheap @asulock @acsmem @acsfd @aculock +@c fsetlocking ok +@c feof_unlocked ok +@c fgets_unlocked ok +@c isspace ok (locale mutex is locked) +@c extend_alias_table @ascuheap @acsmem +@c realloc @ascuheap @acsmem +@c realloc @ascuheap @acsmem +@c fclose @ascuheap @asulock @acsmem @acsfd @aculock +@c qsort @ascuheap @acsmem +@c alias_compare dup +@c libc_lock_unlock @aculock +@c _nl_explode_name @ascuheap @acsmem +@c _nl_find_language ok +@c _nl_normalize_codeset dup @ascuheap @acsmem +@c _nl_make_l10nflist @ascuheap @acsmem +@c malloc @ascuheap @acsmem +@c free @ascuheap @acsmem +@c __argz_stringify ok +@c __argz_count ok +@c __argz_next ok +@c _nl_load_locale @ascuheap @acsmem @acsfd +@c open_not_cancel_2 @acsfd +@c __fxstat64 ok +@c close_not_cancel_no_status ok +@c mmap @acsmem +@c malloc @ascuheap @acsmem +@c read_not_cancel ok +@c free @ascuheap @acsmem +@c _nl_intern_locale_data dup @ascuheap @acsmem +@c munmap ok +@c __gconv_compare_alias @asuinit @ascuheap @asucorrupt @asulock @acsmem@acucorrupt @acsfd @aculock +@c __gconv_read_conf @asuinit @ascuheap @asucorrupt @asulock @acsmem@acucorrupt @acsfd @aculock +@c (libc_once-initializes gconv_cache and gconv_path_envvar; they're +@c never modified afterwards) +@c __gconv_load_cache @ascuheap @acsmem @acsfd +@c getenv GCONV_PATH @mtsenv +@c open_not_cancel @acsfd +@c __fxstat64 ok +@c close_not_cancel_no_status ok +@c mmap @acsmem +@c malloc @ascuheap @acsmem +@c __read ok +@c free @ascuheap @acsmem +@c munmap ok +@c __gconv_get_path @asulock @ascuheap @aculock @acsmem @acsfd +@c getcwd @ascuheap @acsmem @acsfd +@c libc_lock_lock @asulock @aculock +@c malloc @ascuheap @acsmem +@c strtok_r ok +@c libc_lock_unlock @aculock +@c read_conf_file @ascuheap @asucorrupt @asulock @acsmem @acucorrupt @acsfd @aculock +@c fopen @ascuheap @asulock @acsmem @acsfd @aculock +@c fsetlocking ok +@c feof_unlocked ok +@c getdelim @ascuheap @asucorrupt @acsmem @acucorrupt +@c isspace_l ok (C locale) +@c add_alias +@c isspace_l ok (C locale) +@c toupper_l ok (C locale) +@c add_alias2 dup @ascuheap @acucorrupt @acsmem +@c add_module @ascuheap @acsmem +@c isspace_l ok (C locale) +@c toupper_l ok (C locale) +@c strtol ok (@mtslocale but we hold the locale lock) +@c tfind __gconv_alias_db ok +@c __gconv_alias_compare dup ok +@c calloc @ascuheap @acsmem +@c insert_module dup @ascuheap +@c __tfind ok (because the tree is read only by then) +@c __gconv_alias_compare dup ok +@c insert_module @ascuheap +@c free @ascuheap +@c add_alias2 @ascuheap @acucorrupt @acsmem +@c detect_conflict ok, reads __gconv_modules_db +@c malloc @ascuheap @acsmem +@c tsearch __gconv_alias_db @ascuheap @acucorrupt @acsmem [exclusive tree, no @mtsrace] +@c __gconv_alias_compare ok +@c free @ascuheap +@c __gconv_compare_alias_cache ok +@c find_module_idx ok +@c do_lookup_alias ok +@c __tfind ok (because the tree is read only by then) +@c __gconv_alias_compare ok +@c strndup @ascuheap @acsmem +@c strcasecmp_l ok (C locale) +The function @code{setlocale} sets the current locale for category +@var{category} to @var{locale}. + +If @var{category} is @code{LC_ALL}, this specifies the locale for all +purposes. The other possible values of @var{category} specify a +single purpose (@pxref{Locale Categories}). + +You can also use this function to find out the current locale by passing +a null pointer as the @var{locale} argument. In this case, +@code{setlocale} returns a string that is the name of the locale +currently selected for category @var{category}. + +The string returned by @code{setlocale} can be overwritten by subsequent +calls, so you should make a copy of the string (@pxref{Copying Strings +and Arrays}) if you want to save it past any further calls to +@code{setlocale}. (The standard library is guaranteed never to call +@code{setlocale} itself.) + +You should not modify the string returned by @code{setlocale}. It might +be the same string that was passed as an argument in a previous call to +@code{setlocale}. One requirement is that the @var{category} must be +the same in the call the string was returned and the one when the string +is passed in as @var{locale} parameter. + +When you read the current locale for category @code{LC_ALL}, the value +encodes the entire combination of selected locales for all categories. +If you specify the same ``locale name'' with @code{LC_ALL} in a +subsequent call to @code{setlocale}, it restores the same combination +of locale selections. + +To be sure you can use the returned string encoding the currently selected +locale at a later time, you must make a copy of the string. It is not +guaranteed that the returned pointer remains valid over time. + +When the @var{locale} argument is not a null pointer, the string returned +by @code{setlocale} reflects the newly-modified locale. + +If you specify an empty string for @var{locale}, this means to read the +appropriate environment variable and use its value to select the locale +for @var{category}. + +If a nonempty string is given for @var{locale}, then the locale of that +name is used if possible. + +The effective locale name (either the second argument to +@code{setlocale}, or if the argument is an empty string, the name +obtained from the process environment) must be a valid locale name. +@xref{Locale Names}. + +If you specify an invalid locale name, @code{setlocale} returns a null +pointer and leaves the current locale unchanged. +@end deftypefun + +Here is an example showing how you might use @code{setlocale} to +temporarily switch to a new locale. + +@smallexample +#include <stddef.h> +#include <locale.h> +#include <stdlib.h> +#include <string.h> + +void +with_other_locale (char *new_locale, + void (*subroutine) (int), + int argument) +@{ + char *old_locale, *saved_locale; + + /* @r{Get the name of the current locale.} */ + old_locale = setlocale (LC_ALL, NULL); + + /* @r{Copy the name so it won't be clobbered by @code{setlocale}.} */ + saved_locale = strdup (old_locale); + if (saved_locale == NULL) + fatal ("Out of memory"); + + /* @r{Now change the locale and do some stuff with it.} */ + setlocale (LC_ALL, new_locale); + (*subroutine) (argument); + + /* @r{Restore the original locale.} */ + setlocale (LC_ALL, saved_locale); + free (saved_locale); +@} +@end smallexample + +@strong{Portability Note:} Some @w{ISO C} systems may define additional +locale categories, and future versions of the library will do so. For +portability, assume that any symbol beginning with @samp{LC_} might be +defined in @file{locale.h}. + +@node Standard Locales, Locale Names, Setting the Locale, Locales +@section Standard Locales + +The only locale names you can count on finding on all operating systems +are these three standard ones: + +@table @code +@item "C" +This is the standard C locale. The attributes and behavior it provides +are specified in the @w{ISO C} standard. When your program starts up, it +initially uses this locale by default. + +@item "POSIX" +This is the standard POSIX locale. Currently, it is an alias for the +standard C locale. + +@item "" +The empty name says to select a locale based on environment variables. +@xref{Locale Categories}. +@end table + +Defining and installing named locales is normally a responsibility of +the system administrator at your site (or the person who installed +@theglibc{}). It is also possible for the user to create private +locales. All this will be discussed later when describing the tool to +do so. +@comment (@pxref{Building Locale Files}). + +If your program needs to use something other than the @samp{C} locale, +it will be more portable if you use whatever locale the user specifies +with the environment, rather than trying to specify some non-standard +locale explicitly by name. Remember, different machines might have +different sets of locales installed. + +@node Locale Names, Locale Information, Standard Locales, Locales +@section Locale Names + +The following command prints a list of locales supported by the +system: + +@pindex locale +@smallexample + locale -a +@end smallexample + +@strong{Portability Note:} With the notable exception of the standard +locale names @samp{C} and @samp{POSIX}, locale names are +system-specific. + +Most locale names follow XPG syntax and consist of up to four parts: + +@smallexample +@var{language}[_@var{territory}[.@var{codeset}]][@@@var{modifier}] +@end smallexample + +Beside the first part, all of them are allowed to be missing. If the +full specified locale is not found, less specific ones are looked for. +The various parts will be stripped off, in the following order: + +@enumerate +@item +codeset +@item +normalized codeset +@item +territory +@item +modifier +@end enumerate + +For example, the locale name @samp{de_AT.iso885915@@euro} denotes a +German-language locale for use in Austria, using the ISO-8859-15 +(Latin-9) character set, and with the Euro as the currency symbol. + +In addition to locale names which follow XPG syntax, systems may +provide aliases such as @samp{german}. Both categories of names must +not contain the slash character @samp{/}. + +If the locale name starts with a slash @samp{/}, it is treated as a +path relative to the configured locale directories; see @code{LOCPATH} +below. The specified path must not contain a component @samp{..}, or +the name is invalid, and @code{setlocale} will fail. + +@strong{Portability Note:} POSIX suggests that if a locale name starts +with a slash @samp{/}, it is resolved as an absolute path. However, +@theglibc{} treats it as a relative path under the directories listed +in @code{LOCPATH} (or the default locale directory if @code{LOCPATH} +is unset). + +Locale names which are longer than an implementation-defined limit are +invalid and cause @code{setlocale} to fail. + +As a special case, locale names used with @code{LC_ALL} can combine +several locales, reflecting different locale settings for different +categories. For example, you might want to use a U.S. locale with ISO +A4 paper format, so you set @code{LANG} to @samp{en_US.UTF-8}, and +@code{LC_PAPER} to @samp{de_DE.UTF-8}. In this case, the +@code{LC_ALL}-style combined locale name is + +@smallexample +LC_CTYPE=en_US.UTF-8;LC_TIME=en_US.UTF-8;LC_PAPER=de_DE.UTF-8;@dots{} +@end smallexample + +followed by other category settings not shown here. + +@vindex LOCPATH +The path used for finding locale data can be set using the +@code{LOCPATH} environment variable. This variable lists the +directories in which to search for locale definitions, separated by a +colon @samp{:}. + +The default path for finding locale data is system specific. A typical +value for the @code{LOCPATH} default is: + +@smallexample +/usr/share/locale +@end smallexample + +The value of @code{LOCPATH} is ignored by privileged programs for +security reasons, and only the default directory is used. + +@node Locale Information, Formatting Numbers, Locale Names, Locales +@section Accessing Locale Information + +There are several ways to access locale information. The simplest +way is to let the C library itself do the work. Several of the +functions in this library implicitly access the locale data, and use +what information is provided by the currently selected locale. This is +how the locale model is meant to work normally. + +As an example take the @code{strftime} function, which is meant to nicely +format date and time information (@pxref{Formatting Calendar Time}). +Part of the standard information contained in the @code{LC_TIME} +category is the names of the months. Instead of requiring the +programmer to take care of providing the translations the +@code{strftime} function does this all by itself. @code{%A} +in the format string is replaced by the appropriate weekday +name of the locale currently selected by @code{LC_TIME}. This is an +easy example, and wherever possible functions do things automatically +in this way. + +But there are quite often situations when there is simply no function +to perform the task, or it is simply not possible to do the work +automatically. For these cases it is necessary to access the +information in the locale directly. To do this the C library provides +two functions: @code{localeconv} and @code{nl_langinfo}. The former is +part of @w{ISO C} and therefore portable, but has a brain-damaged +interface. The second is part of the Unix interface and is portable in +as far as the system follows the Unix standards. + +@menu +* The Lame Way to Locale Data:: ISO C's @code{localeconv}. +* The Elegant and Fast Way:: X/Open's @code{nl_langinfo}. +@end menu + +@node The Lame Way to Locale Data, The Elegant and Fast Way, ,Locale Information +@subsection @code{localeconv}: It is portable but @dots{} + +Together with the @code{setlocale} function the @w{ISO C} people +invented the @code{localeconv} function. It is a masterpiece of poor +design. It is expensive to use, not extensible, and not generally +usable as it provides access to only @code{LC_MONETARY} and +@code{LC_NUMERIC} related information. Nevertheless, if it is +applicable to a given situation it should be used since it is very +portable. The function @code{strfmon} formats monetary amounts +according to the selected locale using this information. +@pindex locale.h +@cindex monetary value formatting +@cindex numeric value formatting + +@comment locale.h +@comment ISO +@deftypefun {struct lconv *} localeconv (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:localeconv} @mtslocale{}}@asunsafe{}@acsafe{}} +@c This function reads from multiple components of the locale object, +@c without synchronization, while writing to the static buffer it uses +@c as the return value. +The @code{localeconv} function returns a pointer to a structure whose +components contain information about how numeric and monetary values +should be formatted in the current locale. + +You should not modify the structure or its contents. The structure might +be overwritten by subsequent calls to @code{localeconv}, or by calls to +@code{setlocale}, but no other function in the library overwrites this +value. +@end deftypefun + +@comment locale.h +@comment ISO +@deftp {Data Type} {struct lconv} +@code{localeconv}'s return value is of this data type. Its elements are +described in the following subsections. +@end deftp + +If a member of the structure @code{struct lconv} has type @code{char}, +and the value is @code{CHAR_MAX}, it means that the current locale has +no value for that parameter. + +@menu +* General Numeric:: Parameters for formatting numbers and + currency amounts. +* Currency Symbol:: How to print the symbol that identifies an + amount of money (e.g. @samp{$}). +* Sign of Money Amount:: How to print the (positive or negative) sign + for a monetary amount, if one exists. +@end menu + +@node General Numeric, Currency Symbol, , The Lame Way to Locale Data +@subsubsection Generic Numeric Formatting Parameters + +These are the standard members of @code{struct lconv}; there may be +others. + +@table @code +@item char *decimal_point +@itemx char *mon_decimal_point +These are the decimal-point separators used in formatting non-monetary +and monetary quantities, respectively. In the @samp{C} locale, the +value of @code{decimal_point} is @code{"."}, and the value of +@code{mon_decimal_point} is @code{""}. +@cindex decimal-point separator + +@item char *thousands_sep +@itemx char *mon_thousands_sep +These are the separators used to delimit groups of digits to the left of +the decimal point in formatting non-monetary and monetary quantities, +respectively. In the @samp{C} locale, both members have a value of +@code{""} (the empty string). + +@item char *grouping +@itemx char *mon_grouping +These are strings that specify how to group the digits to the left of +the decimal point. @code{grouping} applies to non-monetary quantities +and @code{mon_grouping} applies to monetary quantities. Use either +@code{thousands_sep} or @code{mon_thousands_sep} to separate the digit +groups. +@cindex grouping of digits + +Each member of these strings is to be interpreted as an integer value of +type @code{char}. Successive numbers (from left to right) give the +sizes of successive groups (from right to left, starting at the decimal +point.) The last member is either @code{0}, in which case the previous +member is used over and over again for all the remaining groups, or +@code{CHAR_MAX}, in which case there is no more grouping---or, put +another way, any remaining digits form one large group without +separators. + +For example, if @code{grouping} is @code{"\04\03\02"}, the correct +grouping for the number @code{123456787654321} is @samp{12}, @samp{34}, +@samp{56}, @samp{78}, @samp{765}, @samp{4321}. This uses a group of 4 +digits at the end, preceded by a group of 3 digits, preceded by groups +of 2 digits (as many as needed). With a separator of @samp{,}, the +number would be printed as @samp{12,34,56,78,765,4321}. + +A value of @code{"\03"} indicates repeated groups of three digits, as +normally used in the U.S. + +In the standard @samp{C} locale, both @code{grouping} and +@code{mon_grouping} have a value of @code{""}. This value specifies no +grouping at all. + +@item char int_frac_digits +@itemx char frac_digits +These are small integers indicating how many fractional digits (to the +right of the decimal point) should be displayed in a monetary value in +international and local formats, respectively. (Most often, both +members have the same value.) + +In the standard @samp{C} locale, both of these members have the value +@code{CHAR_MAX}, meaning ``unspecified''. The ISO standard doesn't say +what to do when you find this value; we recommend printing no +fractional digits. (This locale also specifies the empty string for +@code{mon_decimal_point}, so printing any fractional digits would be +confusing!) +@end table + +@node Currency Symbol, Sign of Money Amount, General Numeric, The Lame Way to Locale Data +@subsubsection Printing the Currency Symbol +@cindex currency symbols + +These members of the @code{struct lconv} structure specify how to print +the symbol to identify a monetary value---the international analog of +@samp{$} for US dollars. + +Each country has two standard currency symbols. The @dfn{local currency +symbol} is used commonly within the country, while the +@dfn{international currency symbol} is used internationally to refer to +that country's currency when it is necessary to indicate the country +unambiguously. + +For example, many countries use the dollar as their monetary unit, and +when dealing with international currencies it's important to specify +that one is dealing with (say) Canadian dollars instead of U.S. dollars +or Australian dollars. But when the context is known to be Canada, +there is no need to make this explicit---dollar amounts are implicitly +assumed to be in Canadian dollars. + +@table @code +@item char *currency_symbol +The local currency symbol for the selected locale. + +In the standard @samp{C} locale, this member has a value of @code{""} +(the empty string), meaning ``unspecified''. The ISO standard doesn't +say what to do when you find this value; we recommend you simply print +the empty string as you would print any other string pointed to by this +variable. + +@item char *int_curr_symbol +The international currency symbol for the selected locale. + +The value of @code{int_curr_symbol} should normally consist of a +three-letter abbreviation determined by the international standard +@cite{ISO 4217 Codes for the Representation of Currency and Funds}, +followed by a one-character separator (often a space). + +In the standard @samp{C} locale, this member has a value of @code{""} +(the empty string), meaning ``unspecified''. We recommend you simply print +the empty string as you would print any other string pointed to by this +variable. + +@item char p_cs_precedes +@itemx char n_cs_precedes +@itemx char int_p_cs_precedes +@itemx char int_n_cs_precedes +These members are @code{1} if the @code{currency_symbol} or +@code{int_curr_symbol} strings should precede the value of a monetary +amount, or @code{0} if the strings should follow the value. The +@code{p_cs_precedes} and @code{int_p_cs_precedes} members apply to +positive amounts (or zero), and the @code{n_cs_precedes} and +@code{int_n_cs_precedes} members apply to negative amounts. + +In the standard @samp{C} locale, all of these members have a value of +@code{CHAR_MAX}, meaning ``unspecified''. The ISO standard doesn't say +what to do when you find this value. We recommend printing the +currency symbol before the amount, which is right for most countries. +In other words, treat all nonzero values alike in these members. + +The members with the @code{int_} prefix apply to the +@code{int_curr_symbol} while the other two apply to +@code{currency_symbol}. + +@item char p_sep_by_space +@itemx char n_sep_by_space +@itemx char int_p_sep_by_space +@itemx char int_n_sep_by_space +These members are @code{1} if a space should appear between the +@code{currency_symbol} or @code{int_curr_symbol} strings and the +amount, or @code{0} if no space should appear. The +@code{p_sep_by_space} and @code{int_p_sep_by_space} members apply to +positive amounts (or zero), and the @code{n_sep_by_space} and +@code{int_n_sep_by_space} members apply to negative amounts. + +In the standard @samp{C} locale, all of these members have a value of +@code{CHAR_MAX}, meaning ``unspecified''. The ISO standard doesn't say +what you should do when you find this value; we suggest you treat it as +1 (print a space). In other words, treat all nonzero values alike in +these members. + +The members with the @code{int_} prefix apply to the +@code{int_curr_symbol} while the other two apply to +@code{currency_symbol}. There is one specialty with the +@code{int_curr_symbol}, though. Since all legal values contain a space +at the end of the string one either prints this space (if the currency +symbol must appear in front and must be separated) or one has to avoid +printing this character at all (especially when at the end of the +string). +@end table + +@node Sign of Money Amount, , Currency Symbol, The Lame Way to Locale Data +@subsubsection Printing the Sign of a Monetary Amount + +These members of the @code{struct lconv} structure specify how to print +the sign (if any) of a monetary value. + +@table @code +@item char *positive_sign +@itemx char *negative_sign +These are strings used to indicate positive (or zero) and negative +monetary quantities, respectively. + +In the standard @samp{C} locale, both of these members have a value of +@code{""} (the empty string), meaning ``unspecified''. + +The ISO standard doesn't say what to do when you find this value; we +recommend printing @code{positive_sign} as you find it, even if it is +empty. For a negative value, print @code{negative_sign} as you find it +unless both it and @code{positive_sign} are empty, in which case print +@samp{-} instead. (Failing to indicate the sign at all seems rather +unreasonable.) + +@item char p_sign_posn +@itemx char n_sign_posn +@itemx char int_p_sign_posn +@itemx char int_n_sign_posn +These members are small integers that indicate how to +position the sign for nonnegative and negative monetary quantities, +respectively. (The string used for the sign is what was specified with +@code{positive_sign} or @code{negative_sign}.) The possible values are +as follows: + +@table @code +@item 0 +The currency symbol and quantity should be surrounded by parentheses. + +@item 1 +Print the sign string before the quantity and currency symbol. + +@item 2 +Print the sign string after the quantity and currency symbol. + +@item 3 +Print the sign string right before the currency symbol. + +@item 4 +Print the sign string right after the currency symbol. + +@item CHAR_MAX +``Unspecified''. Both members have this value in the standard +@samp{C} locale. +@end table + +The ISO standard doesn't say what you should do when the value is +@code{CHAR_MAX}. We recommend you print the sign after the currency +symbol. + +The members with the @code{int_} prefix apply to the +@code{int_curr_symbol} while the other two apply to +@code{currency_symbol}. +@end table + +@node The Elegant and Fast Way, , The Lame Way to Locale Data, Locale Information +@subsection Pinpoint Access to Locale Data + +When writing the X/Open Portability Guide the authors realized that the +@code{localeconv} function is not enough to provide reasonable access to +locale information. The information which was meant to be available +in the locale (as later specified in the POSIX.1 standard) requires more +ways to access it. Therefore the @code{nl_langinfo} function +was introduced. + +@comment langinfo.h +@comment XOPEN +@deftypefun {char *} nl_langinfo (nl_item @var{item}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c It calls _nl_langinfo_l with the current locale, which returns a +@c pointer into constant strings defined in locale data structures. +The @code{nl_langinfo} function can be used to access individual +elements of the locale categories. Unlike the @code{localeconv} +function, which returns all the information, @code{nl_langinfo} +lets the caller select what information it requires. This is very +fast and it is not a problem to call this function multiple times. + +A second advantage is that in addition to the numeric and monetary +formatting information, information from the +@code{LC_TIME} and @code{LC_MESSAGES} categories is available. + +@pindex langinfo.h +The type @code{nl_type} is defined in @file{nl_types.h}. The argument +@var{item} is a numeric value defined in the header @file{langinfo.h}. +The X/Open standard defines the following values: + +@vtable @code +@item CODESET +@code{nl_langinfo} returns a string with the name of the coded character +set used in the selected locale. + +@item ABDAY_1 +@itemx ABDAY_2 +@itemx ABDAY_3 +@itemx ABDAY_4 +@itemx ABDAY_5 +@itemx ABDAY_6 +@itemx ABDAY_7 +@code{nl_langinfo} returns the abbreviated weekday name. @code{ABDAY_1} +corresponds to Sunday. +@item DAY_1 +@itemx DAY_2 +@itemx DAY_3 +@itemx DAY_4 +@itemx DAY_5 +@itemx DAY_6 +@itemx DAY_7 +Similar to @code{ABDAY_1} etc., but here the return value is the +unabbreviated weekday name. +@item ABMON_1 +@itemx ABMON_2 +@itemx ABMON_3 +@itemx ABMON_4 +@itemx ABMON_5 +@itemx ABMON_6 +@itemx ABMON_7 +@itemx ABMON_8 +@itemx ABMON_9 +@itemx ABMON_10 +@itemx ABMON_11 +@itemx ABMON_12 +The return value is abbreviated name of the month. @code{ABMON_1} +corresponds to January. +@item MON_1 +@itemx MON_2 +@itemx MON_3 +@itemx MON_4 +@itemx MON_5 +@itemx MON_6 +@itemx MON_7 +@itemx MON_8 +@itemx MON_9 +@itemx MON_10 +@itemx MON_11 +@itemx MON_12 +Similar to @code{ABMON_1} etc., but here the month names are not abbreviated. +Here the first value @code{MON_1} also corresponds to January. +@item AM_STR +@itemx PM_STR +The return values are strings which can be used in the representation of time +as an hour from 1 to 12 plus an am/pm specifier. + +Note that in locales which do not use this time representation +these strings might be empty, in which case the am/pm format +cannot be used at all. +@item D_T_FMT +The return value can be used as a format string for @code{strftime} to +represent time and date in a locale-specific way. +@item D_FMT +The return value can be used as a format string for @code{strftime} to +represent a date in a locale-specific way. +@item T_FMT +The return value can be used as a format string for @code{strftime} to +represent time in a locale-specific way. +@item T_FMT_AMPM +The return value can be used as a format string for @code{strftime} to +represent time in the am/pm format. + +Note that if the am/pm format does not make any sense for the +selected locale, the return value might be the same as the one for +@code{T_FMT}. +@item ERA +The return value represents the era used in the current locale. + +Most locales do not define this value. An example of a locale which +does define this value is the Japanese one. In Japan, the traditional +representation of dates includes the name of the era corresponding to +the then-emperor's reign. + +Normally it should not be necessary to use this value directly. +Specifying the @code{E} modifier in their format strings causes the +@code{strftime} functions to use this information. The format of the +returned string is not specified, and therefore you should not assume +knowledge of it on different systems. +@item ERA_YEAR +The return value gives the year in the relevant era of the locale. +As for @code{ERA} it should not be necessary to use this value directly. +@item ERA_D_T_FMT +This return value can be used as a format string for @code{strftime} to +represent dates and times in a locale-specific era-based way. +@item ERA_D_FMT +This return value can be used as a format string for @code{strftime} to +represent a date in a locale-specific era-based way. +@item ERA_T_FMT +This return value can be used as a format string for @code{strftime} to +represent time in a locale-specific era-based way. +@item ALT_DIGITS +The return value is a representation of up to @math{100} values used to +represent the values @math{0} to @math{99}. As for @code{ERA} this +value is not intended to be used directly, but instead indirectly +through the @code{strftime} function. When the modifier @code{O} is +used in a format which would otherwise use numerals to represent hours, +minutes, seconds, weekdays, months, or weeks, the appropriate value for +the locale is used instead. +@item INT_CURR_SYMBOL +The same as the value returned by @code{localeconv} in the +@code{int_curr_symbol} element of the @code{struct lconv}. +@item CURRENCY_SYMBOL +@itemx CRNCYSTR +The same as the value returned by @code{localeconv} in the +@code{currency_symbol} element of the @code{struct lconv}. + +@code{CRNCYSTR} is a deprecated alias still required by Unix98. +@item MON_DECIMAL_POINT +The same as the value returned by @code{localeconv} in the +@code{mon_decimal_point} element of the @code{struct lconv}. +@item MON_THOUSANDS_SEP +The same as the value returned by @code{localeconv} in the +@code{mon_thousands_sep} element of the @code{struct lconv}. +@item MON_GROUPING +The same as the value returned by @code{localeconv} in the +@code{mon_grouping} element of the @code{struct lconv}. +@item POSITIVE_SIGN +The same as the value returned by @code{localeconv} in the +@code{positive_sign} element of the @code{struct lconv}. +@item NEGATIVE_SIGN +The same as the value returned by @code{localeconv} in the +@code{negative_sign} element of the @code{struct lconv}. +@item INT_FRAC_DIGITS +The same as the value returned by @code{localeconv} in the +@code{int_frac_digits} element of the @code{struct lconv}. +@item FRAC_DIGITS +The same as the value returned by @code{localeconv} in the +@code{frac_digits} element of the @code{struct lconv}. +@item P_CS_PRECEDES +The same as the value returned by @code{localeconv} in the +@code{p_cs_precedes} element of the @code{struct lconv}. +@item P_SEP_BY_SPACE +The same as the value returned by @code{localeconv} in the +@code{p_sep_by_space} element of the @code{struct lconv}. +@item N_CS_PRECEDES +The same as the value returned by @code{localeconv} in the +@code{n_cs_precedes} element of the @code{struct lconv}. +@item N_SEP_BY_SPACE +The same as the value returned by @code{localeconv} in the +@code{n_sep_by_space} element of the @code{struct lconv}. +@item P_SIGN_POSN +The same as the value returned by @code{localeconv} in the +@code{p_sign_posn} element of the @code{struct lconv}. +@item N_SIGN_POSN +The same as the value returned by @code{localeconv} in the +@code{n_sign_posn} element of the @code{struct lconv}. + +@item INT_P_CS_PRECEDES +The same as the value returned by @code{localeconv} in the +@code{int_p_cs_precedes} element of the @code{struct lconv}. +@item INT_P_SEP_BY_SPACE +The same as the value returned by @code{localeconv} in the +@code{int_p_sep_by_space} element of the @code{struct lconv}. +@item INT_N_CS_PRECEDES +The same as the value returned by @code{localeconv} in the +@code{int_n_cs_precedes} element of the @code{struct lconv}. +@item INT_N_SEP_BY_SPACE +The same as the value returned by @code{localeconv} in the +@code{int_n_sep_by_space} element of the @code{struct lconv}. +@item INT_P_SIGN_POSN +The same as the value returned by @code{localeconv} in the +@code{int_p_sign_posn} element of the @code{struct lconv}. +@item INT_N_SIGN_POSN +The same as the value returned by @code{localeconv} in the +@code{int_n_sign_posn} element of the @code{struct lconv}. + +@item DECIMAL_POINT +@itemx RADIXCHAR +The same as the value returned by @code{localeconv} in the +@code{decimal_point} element of the @code{struct lconv}. + +The name @code{RADIXCHAR} is a deprecated alias still used in Unix98. +@item THOUSANDS_SEP +@itemx THOUSEP +The same as the value returned by @code{localeconv} in the +@code{thousands_sep} element of the @code{struct lconv}. + +The name @code{THOUSEP} is a deprecated alias still used in Unix98. +@item GROUPING +The same as the value returned by @code{localeconv} in the +@code{grouping} element of the @code{struct lconv}. +@item YESEXPR +The return value is a regular expression which can be used with the +@code{regex} function to recognize a positive response to a yes/no +question. @Theglibc{} provides the @code{rpmatch} function for +easier handling in applications. +@item NOEXPR +The return value is a regular expression which can be used with the +@code{regex} function to recognize a negative response to a yes/no +question. +@item YESSTR +The return value is a locale-specific translation of the positive response +to a yes/no question. + +Using this value is deprecated since it is a very special case of +message translation, and is better handled by the message +translation functions (@pxref{Message Translation}). + +The use of this symbol is deprecated. Instead message translation +should be used. +@item NOSTR +The return value is a locale-specific translation of the negative response +to a yes/no question. What is said for @code{YESSTR} is also true here. + +The use of this symbol is deprecated. Instead message translation +should be used. +@end vtable + +The file @file{langinfo.h} defines a lot more symbols but none of them +are official. Using them is not portable, and the format of the +return values might change. Therefore we recommended you not use +them. + +Note that the return value for any valid argument can be used +in all situations (with the possible exception of the am/pm time formatting +codes). If the user has not selected any locale for the +appropriate category, @code{nl_langinfo} returns the information from the +@code{"C"} locale. It is therefore possible to use this function as +shown in the example below. + +If the argument @var{item} is not valid, a pointer to an empty string is +returned. +@end deftypefun + +An example of @code{nl_langinfo} usage is a function which has to +print a given date and time in a locale-specific way. At first one +might think that, since @code{strftime} internally uses the locale +information, writing something like the following is enough: + +@smallexample +size_t +i18n_time_n_data (char *s, size_t len, const struct tm *tp) +@{ + return strftime (s, len, "%X %D", tp); +@} +@end smallexample + +The format contains no weekday or month names and therefore is +internationally usable. Wrong! The output produced is something like +@code{"hh:mm:ss MM/DD/YY"}. This format is only recognizable in the +USA. Other countries use different formats. Therefore the function +should be rewritten like this: + +@smallexample +size_t +i18n_time_n_data (char *s, size_t len, const struct tm *tp) +@{ + return strftime (s, len, nl_langinfo (D_T_FMT), tp); +@} +@end smallexample + +Now it uses the date and time format of the locale +selected when the program runs. If the user selects the locale +correctly there should never be a misunderstanding over the time and +date format. + +@node Formatting Numbers, Yes-or-No Questions, Locale Information, Locales +@section A dedicated function to format numbers + +We have seen that the structure returned by @code{localeconv} as well as +the values given to @code{nl_langinfo} allow you to retrieve the various +pieces of locale-specific information to format numbers and monetary +amounts. We have also seen that the underlying rules are quite complex. + +Therefore the X/Open standards introduce a function which uses such +locale information, making it easier for the user to format +numbers according to these rules. + +@deftypefun ssize_t strfmon (char *@var{s}, size_t @var{maxsize}, const char *@var{format}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c It (and strfmon_l) both call vstrfmon_l, which, besides accessing the +@c locale object passed to it, accesses the active locale through +@c isdigit (but to_digit assumes ASCII digits only). It may call +@c __printf_fp (@mtslocale @ascuheap @acsmem) and guess_grouping (safe). +The @code{strfmon} function is similar to the @code{strftime} function +in that it takes a buffer, its size, a format string, +and values to write into the buffer as text in a form specified +by the format string. Like @code{strftime}, the function +also returns the number of bytes written into the buffer. + +There are two differences: @code{strfmon} can take more than one +argument, and, of course, the format specification is different. Like +@code{strftime}, the format string consists of normal text, which is +output as is, and format specifiers, which are indicated by a @samp{%}. +Immediately after the @samp{%}, you can optionally specify various flags +and formatting information before the main formatting character, in a +similar way to @code{printf}: + +@itemize @bullet +@item +Immediately following the @samp{%} there can be one or more of the +following flags: +@table @asis +@item @samp{=@var{f}} +The single byte character @var{f} is used for this field as the numeric +fill character. By default this character is a space character. +Filling with this character is only performed if a left precision +is specified. It is not just to fill to the given field width. +@item @samp{^} +The number is printed without grouping the digits according to the rules +of the current locale. By default grouping is enabled. +@item @samp{+}, @samp{(} +At most one of these flags can be used. They select which format to +represent the sign of a currency amount. By default, and if +@samp{+} is given, the locale equivalent of @math{+}/@math{-} is used. If +@samp{(} is given, negative amounts are enclosed in parentheses. The +exact format is determined by the values of the @code{LC_MONETARY} +category of the locale selected at program runtime. +@item @samp{!} +The output will not contain the currency symbol. +@item @samp{-} +The output will be formatted left-justified instead of right-justified if +it does not fill the entire field width. +@end table +@end itemize + +The next part of the specification is an optional field width. If no +width is specified @math{0} is taken. During output, the function first +determines how much space is required. If it requires at least as many +characters as given by the field width, it is output using as much space +as necessary. Otherwise, it is extended to use the full width by +filling with the space character. The presence or absence of the +@samp{-} flag determines the side at which such padding occurs. If +present, the spaces are added at the right making the output +left-justified, and vice versa. + +So far the format looks familiar, being similar to the @code{printf} and +@code{strftime} formats. However, the next two optional fields +introduce something new. The first one is a @samp{#} character followed +by a decimal digit string. The value of the digit string specifies the +number of @emph{digit} positions to the left of the decimal point (or +equivalent). This does @emph{not} include the grouping character when +the @samp{^} flag is not given. If the space needed to print the number +does not fill the whole width, the field is padded at the left side with +the fill character, which can be selected using the @samp{=} flag and by +default is a space. For example, if the field width is selected as 6 +and the number is @math{123}, the fill character is @samp{*} the result +will be @samp{***123}. + +The second optional field starts with a @samp{.} (period) and consists +of another decimal digit string. Its value describes the number of +characters printed after the decimal point. The default is selected +from the current locale (@code{frac_digits}, @code{int_frac_digits}, see +@pxref{General Numeric}). If the exact representation needs more digits +than given by the field width, the displayed value is rounded. If the +number of fractional digits is selected to be zero, no decimal point is +printed. + +As a GNU extension, the @code{strfmon} implementation in @theglibc{} +allows an optional @samp{L} next as a format modifier. If this modifier +is given, the argument is expected to be a @code{long double} instead of +a @code{double} value. + +Finally, the last component is a format specifier. There are three +specifiers defined: + +@table @asis +@item @samp{i} +Use the locale's rules for formatting an international currency value. +@item @samp{n} +Use the locale's rules for formatting a national currency value. +@item @samp{%} +Place a @samp{%} in the output. There must be no flag, width +specifier or modifier given, only @samp{%%} is allowed. +@end table + +As for @code{printf}, the function reads the format string +from left to right and uses the values passed to the function following +the format string. The values are expected to be either of type +@code{double} or @code{long double}, depending on the presence of the +modifier @samp{L}. The result is stored in the buffer pointed to by +@var{s}. At most @var{maxsize} characters are stored. + +The return value of the function is the number of characters stored in +@var{s}, including the terminating @code{NULL} byte. If the number of +characters stored would exceed @var{maxsize}, the function returns +@math{-1} and the content of the buffer @var{s} is unspecified. In this +case @code{errno} is set to @code{E2BIG}. +@end deftypefun + +A few examples should make clear how the function works. It is +assumed that all the following pieces of code are executed in a program +which uses the USA locale (@code{en_US}). The simplest +form of the format is this: + +@smallexample +strfmon (buf, 100, "@@%n@@%n@@%n@@", 123.45, -567.89, 12345.678); +@end smallexample + +@noindent +The output produced is +@smallexample +"@@$123.45@@-$567.89@@$12,345.68@@" +@end smallexample + +We can notice several things here. First, the widths of the output +numbers are different. We have not specified a width in the format +string, and so this is no wonder. Second, the third number is printed +using thousands separators. The thousands separator for the +@code{en_US} locale is a comma. The number is also rounded. +@math{.678} is rounded to @math{.68} since the format does not specify a +precision and the default value in the locale is @math{2}. Finally, +note that the national currency symbol is printed since @samp{%n} was +used, not @samp{i}. The next example shows how we can align the output. + +@smallexample +strfmon (buf, 100, "@@%=*11n@@%=*11n@@%=*11n@@", 123.45, -567.89, 12345.678); +@end smallexample + +@noindent +The output this time is: + +@smallexample +"@@ $123.45@@ -$567.89@@ $12,345.68@@" +@end smallexample + +Two things stand out. Firstly, all fields have the same width (eleven +characters) since this is the width given in the format and since no +number required more characters to be printed. The second important +point is that the fill character is not used. This is correct since the +white space was not used to achieve a precision given by a @samp{#} +modifier, but instead to fill to the given width. The difference +becomes obvious if we now add a width specification. + +@smallexample +strfmon (buf, 100, "@@%=*11#5n@@%=*11#5n@@%=*11#5n@@", + 123.45, -567.89, 12345.678); +@end smallexample + +@noindent +The output is + +@smallexample +"@@ $***123.45@@-$***567.89@@ $12,456.68@@" +@end smallexample + +Here we can see that all the currency symbols are now aligned, and that +the space between the currency sign and the number is filled with the +selected fill character. Note that although the width is selected to be +@math{5} and @math{123.45} has three digits left of the decimal point, +the space is filled with three asterisks. This is correct since, as +explained above, the width does not include the positions used to store +thousands separators. One last example should explain the remaining +functionality. + +@smallexample +strfmon (buf, 100, "@@%=0(16#5.3i@@%=0(16#5.3i@@%=0(16#5.3i@@", + 123.45, -567.89, 12345.678); +@end smallexample + +@noindent +This rather complex format string produces the following output: + +@smallexample +"@@ USD 000123,450 @@(USD 000567.890)@@ USD 12,345.678 @@" +@end smallexample + +The most noticeable change is the alternative way of representing +negative numbers. In financial circles this is often done using +parentheses, and this is what the @samp{(} flag selected. The fill +character is now @samp{0}. Note that this @samp{0} character is not +regarded as a numeric zero, and therefore the first and second numbers +are not printed using a thousands separator. Since we used the format +specifier @samp{i} instead of @samp{n}, the international form of the +currency symbol is used. This is a four letter string, in this case +@code{"USD "}. The last point is that since the precision right of the +decimal point is selected to be three, the first and second numbers are +printed with an extra zero at the end and the third number is printed +without rounding. + +@node Yes-or-No Questions, , Formatting Numbers , Locales +@section Yes-or-No Questions + +Some non GUI programs ask a yes-or-no question. If the messages +(especially the questions) are translated into foreign languages, be +sure that you localize the answers too. It would be very bad habit to +ask a question in one language and request the answer in another, often +English. + +@Theglibc{} contains @code{rpmatch} to give applications easy +access to the corresponding locale definitions. + +@comment stdlib.h +@comment GNU +@deftypefun int rpmatch (const char *@var{response}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c Calls nl_langinfo with YESEXPR and NOEXPR, triggering @mtslocale but +@c it's regcomp and regexec that bring in all of the safety issues. +@c regfree is also called, but it doesn't introduce any further issues. +The function @code{rpmatch} checks the string in @var{response} for whether +or not it is a correct yes-or-no answer and if yes, which one. The +check uses the @code{YESEXPR} and @code{NOEXPR} data in the +@code{LC_MESSAGES} category of the currently selected locale. The +return value is as follows: + +@table @code +@item 1 +The user entered an affirmative answer. + +@item 0 +The user entered a negative answer. + +@item -1 +The answer matched neither the @code{YESEXPR} nor the @code{NOEXPR} +regular expression. +@end table + +This function is not standardized but available beside in @theglibc{} at +least also in the IBM AIX library. +@end deftypefun + +@noindent +This function would normally be used like this: + +@smallexample + @dots{} + /* @r{Use a safe default.} */ + _Bool doit = false; + + fputs (gettext ("Do you really want to do this? "), stdout); + fflush (stdout); + /* @r{Prepare the @code{getline} call.} */ + line = NULL; + len = 0; + while (getline (&line, &len, stdin) >= 0) + @{ + /* @r{Check the response.} */ + int res = rpmatch (line); + if (res >= 0) + @{ + /* @r{We got a definitive answer.} */ + if (res > 0) + doit = true; + break; + @} + @} + /* @r{Free what @code{getline} allocated.} */ + free (line); +@end smallexample + +Note that the loop continues until a read error is detected or until a +definitive (positive or negative) answer is read. diff --git a/REORG.TODO/manual/macros.texi b/REORG.TODO/manual/macros.texi new file mode 100644 index 0000000000..2e0990eac2 --- /dev/null +++ b/REORG.TODO/manual/macros.texi @@ -0,0 +1,277 @@ +@c Define common macros used to keep phrasing consistent in the manual. + +@ifclear MACROS +@set MACROS + +@c Names used to refer to the library, as noun phrases at the start or +@c not at the start of a sentence. +@macro Theglibc +The GNU C Library +@end macro +@macro theglibc +the GNU C Library +@end macro + +@c Name used to refer to the library as an adjective. +@macro glibcadj +GNU C Library +@end macro + +@c Description applying to all GNU systems; that is, used in +@c describing a property of a system such that no system without that +@c property would be considered a variant of the GNU system. +@macro gnusystems +GNU systems +@end macro + +@c Systems that are not GNU systems. +@macro nongnusystems +non-GNU systems +@end macro + +@c Description applying to GNU/Linux and GNU/Hurd systems, but not +@c necessarily to other variants of the GNU system. +@macro gnulinuxhurdsystems +GNU/Linux and GNU/Hurd systems +@end macro + +@c Description applying to GNU/Hurd systems; that is, systems using the +@c GNU Hurd with the GNU C Library. +@macro gnuhurdsystems +GNU/Hurd systems +@end macro + +@c Description applying to GNU/Linux systems; that is, systems using +@c the Linux kernel with the GNU C Library. +@macro gnulinuxsystems +GNU/Linux systems +@end macro + +@c Document the safety functions as preliminary. It does NOT expand its +@c comments. +@macro prelim {comments} +Preliminary: + +@end macro +@c Document a function as thread safe. +@macro mtsafe {comments} +| MT-Safe \comments\ + +@end macro +@c Document a function as thread unsafe. +@macro mtunsafe {comments} +| MT-Unsafe \comments\ + +@end macro +@c Document a function as safe for use in asynchronous signal handlers. +@macro assafe {comments} +| AS-Safe \comments\ + +@end macro +@c Document a function as unsafe for use in asynchronous signal +@c handlers. This distinguishes unmarked functions, for which this +@c property has not been assessed, from those that have been analyzed. +@macro asunsafe {comments} +| AS-Unsafe \comments\ + +@end macro +@c Document a function as safe for use when asynchronous cancellation is +@c enabled. +@macro acsafe {comments} +| AC-Safe \comments\ + +@end macro +@c Document a function as unsafe for use when asynchronous cancellation +@c is enabled. This distinguishes unmarked functions, for which this +@c property has not been assessed, from those that have been analyzed. +@macro acunsafe {comments} +| AC-Unsafe \comments\ + +@end macro +@c Format safety properties without referencing the section of the +@c definitions. To be used in the definitions of the properties +@c themselves. +@macro sampsafety {notes} +@noindent +\notes\| + + +@end macro +@c Format the safety properties of a function. +@macro safety {notes} +\notes\| @xref{POSIX Safety Concepts}. + + +@end macro +@c Function is MT- and AS-Unsafe due to an internal race. +@macro mtasurace {comments} +race\comments\ +@end macro +@c Function is AS-Unsafe due to an internal race. +@macro asurace {comments} +race\comments\ +@end macro +@c Function is MT-Safe, but with potential race on user-supplied object +@c of opaque type. +@macro mtsrace {comments} +race\comments\ +@end macro +@c Function is MT- and AS-Unsafe for modifying an object that is decreed +@c MT-constant due to MT-Unsafe accesses elsewhere. +@macro mtasuconst {comments} +const\comments\ +@end macro +@c Function accesses the assumed-constant locale object. +@macro mtslocale {comments} +locale\comments\ +@end macro +@c Function accesses the assumed-constant environment. +@macro mtsenv {comments} +env\comments\ +@end macro +@c Function accesses the assumed-constant hostid. +@macro mtshostid {comments} +hostid\comments\ +@end macro +@c Function accesses the assumed-constant _sigintr variable. +@macro mtssigintr {comments} +sigintr\comments\ +@end macro +@c Function performs MT-Unsafe initialization at the first call. +@macro mtuinit {comments} +init\comments\ +@end macro +@c Function performs libc_once AS-Unsafe initialization. +@macro asuinit {comments} +init\comments\ +@end macro +@c Function performs libc_once AC-Unsafe initialization. +@macro acuinit {comments} +init\comments\ +@end macro +@c Function is AS-Unsafe because it takes a non-recursive mutex that may +@c already be held by the function interrupted by the signal. +@macro asulock {comments} +lock\comments\ +@end macro +@c Function is AC-Unsafe because it may fail to release a mutex. +@macro aculock {comments} +lock\comments\ +@end macro +@c Function is AS-Unsafe because some data structure may be inconsistent +@c due to an ongoing updated interrupted by a signal. +@macro asucorrupt {comments} +corrupt\comments\ +@end macro +@c Function is AC-Unsafe because some data structure may be left +@c inconsistent when cancelled. +@macro acucorrupt {comments} +corrupt\comments\ +@end macro +@c Function is AS- and AC-Unsafe because of malloc/free. +@macro ascuheap {comments} +heap\comments\ +@end macro +@c Function is AS-Unsafe because of malloc/free. +@macro asuheap {comments} +heap\comments\ +@end macro +@c Function is AS- and AC-Unsafe because of dlopen/dlclose. +@macro ascudlopen {comments} +dlopen\comments\ +@end macro +@c Function is AS- and AC-Unsafe because of unknown plugins. +@macro ascuplugin {comments} +plugin\comments\ +@end macro +@c Function is AS- and AC-Unsafe because of i18n. +@macro ascuintl {comments} +i18n\comments\ +@end macro +@c Function is AS--Unsafe because of i18n. +@macro asuintl {comments} +i18n\comments\ +@end macro +@c Function may leak file descriptors if async-cancelled. +@macro acsfd {comments} +fd\comments\ +@end macro +@c Function may leak memory if async-cancelled. +@macro acsmem {comments} +mem\comments\ +@end macro +@c Function is unsafe due to temporary overriding a signal handler. +@macro mtascusig {comments} +sig\comments\ +@end macro +@c Function is MT- and AS-Unsafe due to temporarily changing attributes +@c of the controlling terminal. +@macro mtasuterm {comments} +term\comments\ +@end macro +@c Function is AC-Unsafe for failing to restore attributes of the +@c controlling terminal. +@macro acuterm {comments} +term\comments\ +@end macro +@c Function sets timers atomically. +@macro mtstimer {comments} +timer\comments\ +@end macro +@c Function sets and restores timers. +@macro mtascutimer {comments} +timer\comments\ +@end macro +@c Function temporarily changes the current working directory. +@macro mtasscwd {comments} +cwd\comments\ +@end macro +@c Function may fail to restore to the original current working +@c directory after temporarily changing it. +@macro acscwd {comments} +cwd\comments\ +@end macro +@c Function is MT-Safe while POSIX says it needn't be MT-Safe. +@macro mtsposix {comments} +!posix\comments\ +@end macro +@c Function is MT-Unsafe while POSIX says it should be MT-Safe. +@macro mtuposix {comments} +!posix\comments\ +@end macro +@c Function is AS-Safe while POSIX says it needn't be AS-Safe. +@macro assposix {comments} +!posix\comments\ +@end macro +@c Function is AS-Unsafe while POSIX says it should be AS-Safe. +@macro asuposix {comments} +!posix\comments\ +@end macro +@c Function is AC-Safe while POSIX says it needn't be AC-Safe. +@macro acsposix {comments} +!posix\comments\ +@end macro +@c Function is AC-Unsafe while POSIX says it should be AC-Safe. +@macro acuposix {comments} +!posix\comments\ +@end macro + +@iftex +@macro twoexp{exp} +@math{2^{{\exp\}}} +@end macro +@end iftex +@ifnottex +@macro twoexp{exp} +2^\exp\ +@end macro +@end ifnottex + +@c Used by errlist.awk and errnos.awk to generate other files. +@c Note that error values have ABI implications for the Hurd. +@macro errno {err, val, str} +@cindex \str\ +``\str\.'' +@end macro + +@end ifclear diff --git a/REORG.TODO/manual/maint.texi b/REORG.TODO/manual/maint.texi new file mode 100644 index 0000000000..473ab162f0 --- /dev/null +++ b/REORG.TODO/manual/maint.texi @@ -0,0 +1,539 @@ +@node Maintenance, Platform, Installation, Top +@c %MENU% How to enhance and port the GNU C Library +@appendix Library Maintenance + +@menu +* Source Layout:: How to add new functions or header files + to the GNU C Library. +* Porting:: How to port the GNU C Library to + a new machine or operating system. +@end menu + +@node Source Layout +@appendixsec Adding New Functions + +The process of building the library is driven by the makefiles, which +make heavy use of special features of GNU @code{make}. The makefiles +are very complex, and you probably don't want to try to understand them. +But what they do is fairly straightforward, and only requires that you +define a few variables in the right places. + +The library sources are divided into subdirectories, grouped by topic. + +The @file{string} subdirectory has all the string-manipulation +functions, @file{math} has all the mathematical functions, etc. + +Each subdirectory contains a simple makefile, called @file{Makefile}, +which defines a few @code{make} variables and then includes the global +makefile @file{Rules} with a line like: + +@smallexample +include ../Rules +@end smallexample + +@noindent +The basic variables that a subdirectory makefile defines are: + +@table @code +@item subdir +The name of the subdirectory, for example @file{stdio}. +This variable @strong{must} be defined. + +@item headers +The names of the header files in this section of the library, +such as @file{stdio.h}. + +@item routines +@itemx aux +The names of the modules (source files) in this section of the library. +These should be simple names, such as @samp{strlen} (rather than +complete file names, such as @file{strlen.c}). Use @code{routines} for +modules that define functions in the library, and @code{aux} for +auxiliary modules containing things like data definitions. But the +values of @code{routines} and @code{aux} are just concatenated, so there +really is no practical difference.@refill + +@item tests +The names of test programs for this section of the library. These +should be simple names, such as @samp{tester} (rather than complete file +names, such as @file{tester.c}). @w{@samp{make tests}} will build and +run all the test programs. If a test program needs input, put the test +data in a file called @file{@var{test-program}.input}; it will be given to +the test program on its standard input. If a test program wants to be +run with arguments, put the arguments (all on a single line) in a file +called @file{@var{test-program}.args}. Test programs should exit with +zero status when the test passes, and nonzero status when the test +indicates a bug in the library or error in building. + +@item others +The names of ``other'' programs associated with this section of the +library. These are programs which are not tests per se, but are other +small programs included with the library. They are built by +@w{@samp{make others}}.@refill + +@item install-lib +@itemx install-data +@itemx install +Files to be installed by @w{@samp{make install}}. Files listed in +@samp{install-lib} are installed in the directory specified by +@samp{libdir} in @file{configparms} or @file{Makeconfig} +(@pxref{Installation}). Files listed in @code{install-data} are +installed in the directory specified by @samp{datadir} in +@file{configparms} or @file{Makeconfig}. Files listed in @code{install} +are installed in the directory specified by @samp{bindir} in +@file{configparms} or @file{Makeconfig}.@refill + +@item distribute +Other files from this subdirectory which should be put into a +distribution tar file. You need not list here the makefile itself or +the source and header files listed in the other standard variables. +Only define @code{distribute} if there are files used in an unusual way +that should go into the distribution. + +@item generated +Files which are generated by @file{Makefile} in this subdirectory. +These files will be removed by @w{@samp{make clean}}, and they will +never go into a distribution. + +@item extra-objs +Extra object files which are built by @file{Makefile} in this +subdirectory. This should be a list of file names like @file{foo.o}; +the files will actually be found in whatever directory object files are +being built in. These files will be removed by @w{@samp{make clean}}. +This variable is used for secondary object files needed to build +@code{others} or @code{tests}. +@end table + +@menu +* Platform: Adding Platform-specific. Adding platform-specific + features. +@end menu + +@node Adding Platform-specific +@appendixsubsec Platform-specific types, macros and functions + +It's sometimes necessary to provide nonstandard, platform-specific +features to developers. The C library is traditionally the +lowest library layer, so it makes sense for it to provide these +low-level features. However, including these features in the C +library may be a disadvantage if another package provides them +as well as there will be two conflicting versions of them. Also, +the features won't be available to projects that do not use +@theglibc{} but use other GNU tools, like GCC. + +The current guidelines are: +@itemize @bullet +@item +If the header file provides features that only make sense on a particular +machine architecture and have nothing to do with an operating system, then +the features should ultimately be provided as GCC built-in functions. Until +then, @theglibc{} may provide them in the header file. When the GCC built-in +functions become available, those provided in the header file should be made +conditionally available prior to the GCC version in which the built-in +function was made available. + +@item +If the header file provides features that are specific to an operating system, +both GCC and @theglibc{} could provide it, but @theglibc{} is preferred +as it already has a lot of information about the operating system. + +@item +If the header file provides features that are specific to an operating system +but used by @theglibc{}, then @theglibc{} should provide them. +@end itemize + +The general solution for providing low-level features is to export them as +follows: + +@itemize @bullet +@item +A nonstandard, low-level header file that defines macros and inline +functions should be called @file{sys/platform/@var{name}.h}. + +@item +Each header file's name should include the platform name, to avoid +users thinking there is anything in common between the different +header files for different platforms. For example, a +@file{sys/platform/@var{arch}.h} name such as +@file{sys/platform/ppc.h} is better than @file{sys/platform.h}. + +@item +A platform-specific header file provided by @theglibc{} should coordinate +with GCC such that compiler built-in versions of the functions and macros are +preferred if available. This means that user programs will only ever need to +include @file{sys/platform/@var{arch}.h}, keeping the same names of types, +macros, and functions for convenience and portability. + +@item +Each included symbol must have the prefix @code{__@var{arch}_}, such as +@code{__ppc_get_timebase}. +@end itemize + + +The easiest way to provide a header file is to add it to the +@code{sysdep_headers} variable. For example, the combination of +Linux-specific header files on PowerPC could be provided like this: + +@smallexample +sysdep_headers += sys/platform/ppc.h +@end smallexample + +Then ensure that you have added a @file{sys/platform/ppc.h} +header file in the machine-specific directory, e.g., +@file{sysdeps/powerpc/sys/platform/ppc.h}. + + +@node Porting +@appendixsec Porting @theglibc{} + +@Theglibc{} is written to be easily portable to a variety of +machines and operating systems. Machine- and operating system-dependent +functions are well separated to make it easy to add implementations for +new machines or operating systems. This section describes the layout of +the library source tree and explains the mechanisms used to select +machine-dependent code to use. + +All the machine-dependent and operating system-dependent files in the +library are in the subdirectory @file{sysdeps} under the top-level +library source directory. This directory contains a hierarchy of +subdirectories (@pxref{Hierarchy Conventions}). + +Each subdirectory of @file{sysdeps} contains source files for a +particular machine or operating system, or for a class of machine or +operating system (for example, systems by a particular vendor, or all +machines that use IEEE 754 floating-point format). A configuration +specifies an ordered list of these subdirectories. Each subdirectory +implicitly appends its parent directory to the list. For example, +specifying the list @file{unix/bsd/vax} is equivalent to specifying the +list @file{unix/bsd/vax unix/bsd unix}. A subdirectory can also specify +that it implies other subdirectories which are not directly above it in +the directory hierarchy. If the file @file{Implies} exists in a +subdirectory, it lists other subdirectories of @file{sysdeps} which are +appended to the list, appearing after the subdirectory containing the +@file{Implies} file. Lines in an @file{Implies} file that begin with a +@samp{#} character are ignored as comments. For example, +@file{unix/bsd/Implies} contains:@refill +@smallexample +# BSD has Internet-related things. +unix/inet +@end smallexample +@noindent +and @file{unix/Implies} contains: +@need 300 +@smallexample +posix +@end smallexample + +@noindent +So the final list is @file{unix/bsd/vax unix/bsd unix/inet unix posix}. + +@file{sysdeps} has a ``special'' subdirectory called @file{generic}. It +is always implicitly appended to the list of subdirectories, so you +needn't put it in an @file{Implies} file, and you should not create any +subdirectories under it intended to be new specific categories. +@file{generic} serves two purposes. First, the makefiles do not bother +to look for a system-dependent version of a file that's not in +@file{generic}. This means that any system-dependent source file must +have an analogue in @file{generic}, even if the routines defined by that +file are not implemented on other platforms. Second, the @file{generic} +version of a system-dependent file is used if the makefiles do not find +a version specific to the system you're compiling for. + +If it is possible to implement the routines in a @file{generic} file in +machine-independent C, using only other machine-independent functions in +the C library, then you should do so. Otherwise, make them stubs. A +@dfn{stub} function is a function which cannot be implemented on a +particular machine or operating system. Stub functions always return an +error, and set @code{errno} to @code{ENOSYS} (Function not implemented). +@xref{Error Reporting}. If you define a stub function, you must place +the statement @code{stub_warning(@var{function})}, where @var{function} +is the name of your function, after its definition. This causes the +function to be listed in the installed @code{<gnu/stubs.h>}, and +makes GNU ld warn when the function is used. + +Some rare functions are only useful on specific systems and aren't +defined at all on others; these do not appear anywhere in the +system-independent source code or makefiles (including the +@file{generic} directory), only in the system-dependent @file{Makefile} +in the specific system's subdirectory. + +If you come across a file that is in one of the main source directories +(@file{string}, @file{stdio}, etc.), and you want to write a machine- or +operating system-dependent version of it, move the file into +@file{sysdeps/generic} and write your new implementation in the +appropriate system-specific subdirectory. Note that if a file is to be +system-dependent, it @strong{must not} appear in one of the main source +directories.@refill + +There are a few special files that may exist in each subdirectory of +@file{sysdeps}: + +@comment Blank lines after items make the table look better. +@table @file +@item Makefile + +A makefile for this machine or operating system, or class of machine or +operating system. This file is included by the library makefile +@file{Makerules}, which is used by the top-level makefile and the +subdirectory makefiles. It can change the variables set in the +including makefile or add new rules. It can use GNU @code{make} +conditional directives based on the variable @samp{subdir} (see above) to +select different sets of variables and rules for different sections of +the library. It can also set the @code{make} variable +@samp{sysdep-routines}, to specify extra modules to be included in the +library. You should use @samp{sysdep-routines} rather than adding +modules to @samp{routines} because the latter is used in determining +what to distribute for each subdirectory of the main source tree.@refill + +Each makefile in a subdirectory in the ordered list of subdirectories to +be searched is included in order. Since several system-dependent +makefiles may be included, each should append to @samp{sysdep-routines} +rather than simply setting it: + +@smallexample +sysdep-routines := $(sysdep-routines) foo bar +@end smallexample + +@need 1000 +@item Subdirs + +This file contains the names of new whole subdirectories under the +top-level library source tree that should be included for this system. +These subdirectories are treated just like the system-independent +subdirectories in the library source tree, such as @file{stdio} and +@file{math}. + +Use this when there are completely new sets of functions and header +files that should go into the library for the system this subdirectory +of @file{sysdeps} implements. For example, +@file{sysdeps/unix/inet/Subdirs} contains @file{inet}; the @file{inet} +directory contains various network-oriented operations which only make +sense to put in the library on systems that support the Internet.@refill + +@item configure + +This file is a shell script fragment to be run at configuration time. +The top-level @file{configure} script uses the shell @code{.} command to +read the @file{configure} file in each system-dependent directory +chosen, in order. The @file{configure} files are often generated from +@file{configure.ac} files using Autoconf. + +A system-dependent @file{configure} script will usually add things to +the shell variables @samp{DEFS} and @samp{config_vars}; see the +top-level @file{configure} script for details. The script can check for +@w{@samp{--with-@var{package}}} options that were passed to the +top-level @file{configure}. For an option +@w{@samp{--with-@var{package}=@var{value}}} @file{configure} sets the +shell variable @w{@samp{with_@var{package}}} (with any dashes in +@var{package} converted to underscores) to @var{value}; if the option is +just @w{@samp{--with-@var{package}}} (no argument), then it sets +@w{@samp{with_@var{package}}} to @samp{yes}. + +@item configure.ac + +This file is an Autoconf input fragment to be processed into the file +@file{configure} in this subdirectory. @xref{Introduction,,, +autoconf.info, Autoconf: Generating Automatic Configuration Scripts}, +for a description of Autoconf. You should write either @file{configure} +or @file{configure.ac}, but not both. The first line of +@file{configure.ac} should invoke the @code{m4} macro +@samp{GLIBC_PROVIDES}. This macro does several @code{AC_PROVIDE} calls +for Autoconf macros which are used by the top-level @file{configure} +script; without this, those macros might be invoked again unnecessarily +by Autoconf. +@end table + +That is the general system for how system-dependencies are isolated. +@iftex +The next section explains how to decide what directories in +@file{sysdeps} to use. @ref{Porting to Unix}, has some tips on porting +the library to Unix variants. +@end iftex + +@menu +* Hierarchy Conventions:: The layout of the @file{sysdeps} hierarchy. +* Porting to Unix:: Porting the library to an average + Unix-like system. +@end menu + +@node Hierarchy Conventions +@appendixsubsec Layout of the @file{sysdeps} Directory Hierarchy + +A GNU configuration name has three parts: the CPU type, the +manufacturer's name, and the operating system. @file{configure} uses +these to pick the list of system-dependent directories to look for. If +the @samp{--nfp} option is @emph{not} passed to @file{configure}, the +directory @file{@var{machine}/fpu} is also used. The operating system +often has a @dfn{base operating system}; for example, if the operating +system is @samp{Linux}, the base operating system is @samp{unix/sysv}. +The algorithm used to pick the list of directories is simple: +@file{configure} makes a list of the base operating system, +manufacturer, CPU type, and operating system, in that order. It then +concatenates all these together with slashes in between, to produce a +directory name; for example, the configuration @w{@samp{i686-linux-gnu}} +results in @file{unix/sysv/linux/i386/i686}. @file{configure} then +tries removing each element of the list in turn, so +@file{unix/sysv/linux} and @file{unix/sysv} are also tried, among others. +Since the precise version number of the operating system is often not +important, and it would be very inconvenient, for example, to have +identical @file{irix6.2} and @file{irix6.3} directories, +@file{configure} tries successively less specific operating system names +by removing trailing suffixes starting with a period. + +As an example, here is the complete list of directories that would be +tried for the configuration @w{@samp{i686-linux-gnu}} (with the +@file{crypt} and @file{linuxthreads} add-on): + +@smallexample +sysdeps/i386/elf +crypt/sysdeps/unix +linuxthreads/sysdeps/unix/sysv/linux +linuxthreads/sysdeps/pthread +linuxthreads/sysdeps/unix/sysv +linuxthreads/sysdeps/unix +linuxthreads/sysdeps/i386/i686 +linuxthreads/sysdeps/i386 +linuxthreads/sysdeps/pthread/no-cmpxchg +sysdeps/unix/sysv/linux/i386 +sysdeps/unix/sysv/linux +sysdeps/gnu +sysdeps/unix/common +sysdeps/unix/mman +sysdeps/unix/inet +sysdeps/unix/sysv/i386/i686 +sysdeps/unix/sysv/i386 +sysdeps/unix/sysv +sysdeps/unix/i386 +sysdeps/unix +sysdeps/posix +sysdeps/i386/i686 +sysdeps/i386/i486 +sysdeps/libm-i387/i686 +sysdeps/i386/fpu +sysdeps/libm-i387 +sysdeps/i386 +sysdeps/wordsize-32 +sysdeps/ieee754 +sysdeps/libm-ieee754 +sysdeps/generic +@end smallexample + +Different machine architectures are conventionally subdirectories at the +top level of the @file{sysdeps} directory tree. For example, +@w{@file{sysdeps/sparc}} and @w{@file{sysdeps/m68k}}. These contain +files specific to those machine architectures, but not specific to any +particular operating system. There might be subdirectories for +specializations of those architectures, such as +@w{@file{sysdeps/m68k/68020}}. Code which is specific to the +floating-point coprocessor used with a particular machine should go in +@w{@file{sysdeps/@var{machine}/fpu}}. + +There are a few directories at the top level of the @file{sysdeps} +hierarchy that are not for particular machine architectures. + +@table @file +@item generic +As described above (@pxref{Porting}), this is the subdirectory +that every configuration implicitly uses after all others. + +@item ieee754 +This directory is for code using the IEEE 754 floating-point format, +where the C type @code{float} is IEEE 754 single-precision format, and +@code{double} is IEEE 754 double-precision format. Usually this +directory is referred to in the @file{Implies} file in a machine +architecture-specific directory, such as @file{m68k/Implies}. + +@item libm-ieee754 +This directory contains an implementation of a mathematical library +usable on platforms which use @w{IEEE 754} conformant floating-point +arithmetic. + +@item libm-i387 +This is a special case. Ideally the code should be in +@file{sysdeps/i386/fpu} but for various reasons it is kept aside. + +@item posix +This directory contains implementations of things in the library in +terms of @sc{POSIX.1} functions. This includes some of the @sc{POSIX.1} +functions themselves. Of course, @sc{POSIX.1} cannot be completely +implemented in terms of itself, so a configuration using just +@file{posix} cannot be complete. + +@item unix +This is the directory for Unix-like things. @xref{Porting to Unix}. +@file{unix} implies @file{posix}. There are some special-purpose +subdirectories of @file{unix}: + +@table @file +@item unix/common +This directory is for things common to both BSD and System V release 4. +Both @file{unix/bsd} and @file{unix/sysv/sysv4} imply @file{unix/common}. + +@item unix/inet +This directory is for @code{socket} and related functions on Unix systems. +@file{unix/inet/Subdirs} enables the @file{inet} top-level subdirectory. +@file{unix/common} implies @file{unix/inet}. +@end table + +@item mach +This is the directory for things based on the Mach microkernel from CMU +(including @gnuhurdsystems{}). Other basic operating systems +(VMS, for example) would have their own directories at the top level of +the @file{sysdeps} hierarchy, parallel to @file{unix} and @file{mach}. +@end table + +@node Porting to Unix +@appendixsubsec Porting @theglibc{} to Unix Systems + +Most Unix systems are fundamentally very similar. There are variations +between different machines, and variations in what facilities are +provided by the kernel. But the interface to the operating system +facilities is, for the most part, pretty uniform and simple. + +The code for Unix systems is in the directory @file{unix}, at the top +level of the @file{sysdeps} hierarchy. This directory contains +subdirectories (and subdirectory trees) for various Unix variants. + +The functions which are system calls in most Unix systems are +implemented in assembly code, which is generated automatically from +specifications in files named @file{syscalls.list}. There are several +such files, one in @file{sysdeps/unix} and others in its subdirectories. +Some special system calls are implemented in files that are named with a +suffix of @samp{.S}; for example, @file{_exit.S}. Files ending in +@samp{.S} are run through the C preprocessor before being fed to the +assembler. + +These files all use a set of macros that should be defined in +@file{sysdep.h}. The @file{sysdep.h} file in @file{sysdeps/unix} +partially defines them; a @file{sysdep.h} file in another directory must +finish defining them for the particular machine and operating system +variant. See @file{sysdeps/unix/sysdep.h} and the machine-specific +@file{sysdep.h} implementations to see what these macros are and what +they should do.@refill + +The system-specific makefile for the @file{unix} directory +(@file{sysdeps/unix/Makefile}) gives rules to generate several files +from the Unix system you are building the library on (which is assumed +to be the target system you are building the library @emph{for}). All +the generated files are put in the directory where the object files are +kept; they should not affect the source tree itself. The files +generated are @file{ioctls.h}, @file{errnos.h}, @file{sys/param.h}, and +@file{errlist.c} (for the @file{stdio} section of the library). + +@ignore +@c This section might be a good idea if it is finished, +@c but there's no point including it as it stands. --rms +@c @appendixsec Compatibility with Traditional C + +@c ??? This section is really short now. Want to keep it? --roland + +@c It's not anymore true. glibc 2.1 cannot be used with K&R compilers. +@c --drepper + +Although @theglibc{} implements the @w{ISO C} library facilities, you +@emph{can} use @theglibc{} with traditional, ``pre-ISO'' C +compilers. However, you need to be careful because the content and +organization of the @glibcadj{} header files differs from that of +traditional C implementations. This means you may need to make changes +to your program in order to get it to compile. +@end ignore diff --git a/REORG.TODO/manual/math.texi b/REORG.TODO/manual/math.texi new file mode 100644 index 0000000000..69a0acec9b --- /dev/null +++ b/REORG.TODO/manual/math.texi @@ -0,0 +1,2078 @@ +@c We need some definitions here. +@ifclear mult +@ifhtml +@set mult · +@set infty ∞ +@set pie π +@end ifhtml +@iftex +@set mult @cdot +@set infty @infty +@end iftex +@ifclear mult +@set mult * +@set infty oo +@set pie pi +@end ifclear +@macro mul +@value{mult} +@end macro +@macro infinity +@value{infty} +@end macro +@ifnottex +@macro pi +@value{pie} +@end macro +@end ifnottex +@end ifclear + +@node Mathematics, Arithmetic, Syslog, Top +@c %MENU% Math functions, useful constants, random numbers +@chapter Mathematics + +This chapter contains information about functions for performing +mathematical computations, such as trigonometric functions. Most of +these functions have prototypes declared in the header file +@file{math.h}. The complex-valued functions are defined in +@file{complex.h}. +@pindex math.h +@pindex complex.h + +All mathematical functions which take a floating-point argument +have three variants, one each for @code{double}, @code{float}, and +@code{long double} arguments. The @code{double} versions are mostly +defined in @w{ISO C89}. The @code{float} and @code{long double} +versions are from the numeric extensions to C included in @w{ISO C99}. + +Which of the three versions of a function should be used depends on the +situation. For most calculations, the @code{float} functions are the +fastest. On the other hand, the @code{long double} functions have the +highest precision. @code{double} is somewhere in between. It is +usually wise to pick the narrowest type that can accommodate your data. +Not all machines have a distinct @code{long double} type; it may be the +same as @code{double}. + +@menu +* Mathematical Constants:: Precise numeric values for often-used + constants. +* Trig Functions:: Sine, cosine, tangent, and friends. +* Inverse Trig Functions:: Arcsine, arccosine, etc. +* Exponents and Logarithms:: Also pow and sqrt. +* Hyperbolic Functions:: sinh, cosh, tanh, etc. +* Special Functions:: Bessel, gamma, erf. +* Errors in Math Functions:: Known Maximum Errors in Math Functions. +* Pseudo-Random Numbers:: Functions for generating pseudo-random + numbers. +* FP Function Optimizations:: Fast code or small code. +@end menu + +@node Mathematical Constants +@section Predefined Mathematical Constants +@cindex constants +@cindex mathematical constants + +The header @file{math.h} defines several useful mathematical constants. +All values are defined as preprocessor macros starting with @code{M_}. +The values provided are: + +@vtable @code +@item M_E +The base of natural logarithms. +@item M_LOG2E +The logarithm to base @code{2} of @code{M_E}. +@item M_LOG10E +The logarithm to base @code{10} of @code{M_E}. +@item M_LN2 +The natural logarithm of @code{2}. +@item M_LN10 +The natural logarithm of @code{10}. +@item M_PI +Pi, the ratio of a circle's circumference to its diameter. +@item M_PI_2 +Pi divided by two. +@item M_PI_4 +Pi divided by four. +@item M_1_PI +The reciprocal of pi (1/pi) +@item M_2_PI +Two times the reciprocal of pi. +@item M_2_SQRTPI +Two times the reciprocal of the square root of pi. +@item M_SQRT2 +The square root of two. +@item M_SQRT1_2 +The reciprocal of the square root of two (also the square root of 1/2). +@end vtable + +These constants come from the Unix98 standard and were also available in +4.4BSD; therefore they are only defined if +@code{_XOPEN_SOURCE=500}, or a more general feature select macro, is +defined. The default set of features includes these constants. +@xref{Feature Test Macros}. + +All values are of type @code{double}. As an extension, @theglibc{} +also defines these constants with type @code{long double}. The +@code{long double} macros have a lowercase @samp{l} appended to their +names: @code{M_El}, @code{M_PIl}, and so forth. These are only +available if @code{_GNU_SOURCE} is defined. + +@vindex PI +@emph{Note:} Some programs use a constant named @code{PI} which has the +same value as @code{M_PI}. This constant is not standard; it may have +appeared in some old AT&T headers, and is mentioned in Stroustrup's book +on C++. It infringes on the user's name space, so @theglibc{} +does not define it. Fixing programs written to expect it is simple: +replace @code{PI} with @code{M_PI} throughout, or put @samp{-DPI=M_PI} +on the compiler command line. + +@node Trig Functions +@section Trigonometric Functions +@cindex trigonometric functions + +These are the familiar @code{sin}, @code{cos}, and @code{tan} functions. +The arguments to all of these functions are in units of radians; recall +that pi radians equals 180 degrees. + +@cindex pi (trigonometric constant) +The math library normally defines @code{M_PI} to a @code{double} +approximation of pi. If strict ISO and/or POSIX compliance +are requested this constant is not defined, but you can easily define it +yourself: + +@smallexample +#define M_PI 3.14159265358979323846264338327 +@end smallexample + +@noindent +You can also compute the value of pi with the expression @code{acos +(-1.0)}. + +@comment math.h +@comment ISO +@deftypefun double sin (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float sinf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} sinl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the sine of @var{x}, where @var{x} is given in +radians. The return value is in the range @code{-1} to @code{1}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double cos (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float cosf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} cosl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the cosine of @var{x}, where @var{x} is given in +radians. The return value is in the range @code{-1} to @code{1}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double tan (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float tanf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} tanl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the tangent of @var{x}, where @var{x} is given in +radians. + +Mathematically, the tangent function has singularities at odd multiples +of pi/2. If the argument @var{x} is too close to one of these +singularities, @code{tan} will signal overflow. +@end deftypefun + +In many applications where @code{sin} and @code{cos} are used, the sine +and cosine of the same angle are needed at the same time. It is more +efficient to compute them simultaneously, so the library provides a +function to do that. + +@comment math.h +@comment GNU +@deftypefun void sincos (double @var{x}, double *@var{sinx}, double *@var{cosx}) +@comment math.h +@comment GNU +@deftypefunx void sincosf (float @var{x}, float *@var{sinx}, float *@var{cosx}) +@comment math.h +@comment GNU +@deftypefunx void sincosl (long double @var{x}, long double *@var{sinx}, long double *@var{cosx}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the sine of @var{x} in @code{*@var{sinx}} and the +cosine of @var{x} in @code{*@var{cosx}}, where @var{x} is given in +radians. Both values, @code{*@var{sinx}} and @code{*@var{cosx}}, are in +the range of @code{-1} to @code{1}. + +This function is a GNU extension. Portable programs should be prepared +to cope with its absence. +@end deftypefun + +@cindex complex trigonometric functions + +@w{ISO C99} defines variants of the trig functions which work on +complex numbers. @Theglibc{} provides these functions, but they +are only useful if your compiler supports the new complex types defined +by the standard. +@c XXX Change this when gcc is fixed. -zw +(As of this writing GCC supports complex numbers, but there are bugs in +the implementation.) + +@comment complex.h +@comment ISO +@deftypefun {complex double} csin (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} csinf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} csinl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c There are calls to nan* that could trigger @mtslocale if they didn't get +@c empty strings. +These functions return the complex sine of @var{z}. +The mathematical definition of the complex sine is + +@ifnottex +@math{sin (z) = 1/(2*i) * (exp (z*i) - exp (-z*i))}. +@end ifnottex +@tex +$$\sin(z) = {1\over 2i} (e^{zi} - e^{-zi})$$ +@end tex +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} ccos (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} ccosf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} ccosl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the complex cosine of @var{z}. +The mathematical definition of the complex cosine is + +@ifnottex +@math{cos (z) = 1/2 * (exp (z*i) + exp (-z*i))} +@end ifnottex +@tex +$$\cos(z) = {1\over 2} (e^{zi} + e^{-zi})$$ +@end tex +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} ctan (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} ctanf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} ctanl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the complex tangent of @var{z}. +The mathematical definition of the complex tangent is + +@ifnottex +@math{tan (z) = -i * (exp (z*i) - exp (-z*i)) / (exp (z*i) + exp (-z*i))} +@end ifnottex +@tex +$$\tan(z) = -i \cdot {e^{zi} - e^{-zi}\over e^{zi} + e^{-zi}}$$ +@end tex + +@noindent +The complex tangent has poles at @math{pi/2 + 2n}, where @math{n} is an +integer. @code{ctan} may signal overflow if @var{z} is too close to a +pole. +@end deftypefun + + +@node Inverse Trig Functions +@section Inverse Trigonometric Functions +@cindex inverse trigonometric functions + +These are the usual arcsine, arccosine and arctangent functions, +which are the inverses of the sine, cosine and tangent functions +respectively. + +@comment math.h +@comment ISO +@deftypefun double asin (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float asinf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} asinl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute the arcsine of @var{x}---that is, the value whose +sine is @var{x}. The value is in units of radians. Mathematically, +there are infinitely many such values; the one actually returned is the +one between @code{-pi/2} and @code{pi/2} (inclusive). + +The arcsine function is defined mathematically only +over the domain @code{-1} to @code{1}. If @var{x} is outside the +domain, @code{asin} signals a domain error. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double acos (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float acosf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} acosl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute the arccosine of @var{x}---that is, the value +whose cosine is @var{x}. The value is in units of radians. +Mathematically, there are infinitely many such values; the one actually +returned is the one between @code{0} and @code{pi} (inclusive). + +The arccosine function is defined mathematically only +over the domain @code{-1} to @code{1}. If @var{x} is outside the +domain, @code{acos} signals a domain error. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double atan (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float atanf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} atanl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute the arctangent of @var{x}---that is, the value +whose tangent is @var{x}. The value is in units of radians. +Mathematically, there are infinitely many such values; the one actually +returned is the one between @code{-pi/2} and @code{pi/2} (inclusive). +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double atan2 (double @var{y}, double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float atan2f (float @var{y}, float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} atan2l (long double @var{y}, long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function computes the arctangent of @var{y}/@var{x}, but the signs +of both arguments are used to determine the quadrant of the result, and +@var{x} is permitted to be zero. The return value is given in radians +and is in the range @code{-pi} to @code{pi}, inclusive. + +If @var{x} and @var{y} are coordinates of a point in the plane, +@code{atan2} returns the signed angle between the line from the origin +to that point and the x-axis. Thus, @code{atan2} is useful for +converting Cartesian coordinates to polar coordinates. (To compute the +radial coordinate, use @code{hypot}; see @ref{Exponents and +Logarithms}.) + +@c This is experimentally true. Should it be so? -zw +If both @var{x} and @var{y} are zero, @code{atan2} returns zero. +@end deftypefun + +@cindex inverse complex trigonometric functions +@w{ISO C99} defines complex versions of the inverse trig functions. + +@comment complex.h +@comment ISO +@deftypefun {complex double} casin (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} casinf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} casinl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute the complex arcsine of @var{z}---that is, the +value whose sine is @var{z}. The value returned is in radians. + +Unlike the real-valued functions, @code{casin} is defined for all +values of @var{z}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} cacos (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} cacosf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} cacosl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute the complex arccosine of @var{z}---that is, the +value whose cosine is @var{z}. The value returned is in radians. + +Unlike the real-valued functions, @code{cacos} is defined for all +values of @var{z}. +@end deftypefun + + +@comment complex.h +@comment ISO +@deftypefun {complex double} catan (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} catanf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} catanl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute the complex arctangent of @var{z}---that is, +the value whose tangent is @var{z}. The value is in units of radians. +@end deftypefun + + +@node Exponents and Logarithms +@section Exponentiation and Logarithms +@cindex exponentiation functions +@cindex power functions +@cindex logarithm functions + +@comment math.h +@comment ISO +@deftypefun double exp (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float expf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} expl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute @code{e} (the base of natural logarithms) raised +to the power @var{x}. + +If the magnitude of the result is too large to be representable, +@code{exp} signals overflow. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double exp2 (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float exp2f (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} exp2l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute @code{2} raised to the power @var{x}. +Mathematically, @code{exp2 (x)} is the same as @code{exp (x * log (2))}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double exp10 (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float exp10f (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} exp10l (long double @var{x}) +@comment math.h +@comment GNU +@deftypefunx double pow10 (double @var{x}) +@comment math.h +@comment GNU +@deftypefunx float pow10f (float @var{x}) +@comment math.h +@comment GNU +@deftypefunx {long double} pow10l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute @code{10} raised to the power @var{x}. +Mathematically, @code{exp10 (x)} is the same as @code{exp (x * log (10))}. + +The @code{exp10} functions are from TS 18661-4:2015; the @code{pow10} +names are GNU extensions. The name @code{exp10} is +preferred, since it is analogous to @code{exp} and @code{exp2}. +@end deftypefun + + +@comment math.h +@comment ISO +@deftypefun double log (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float logf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} logl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions compute the natural logarithm of @var{x}. @code{exp (log +(@var{x}))} equals @var{x}, exactly in mathematics and approximately in +C. + +If @var{x} is negative, @code{log} signals a domain error. If @var{x} +is zero, it returns negative infinity; if @var{x} is too close to zero, +it may signal overflow. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double log10 (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float log10f (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} log10l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the base-10 logarithm of @var{x}. +@code{log10 (@var{x})} equals @code{log (@var{x}) / log (10)}. + +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double log2 (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float log2f (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} log2l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the base-2 logarithm of @var{x}. +@code{log2 (@var{x})} equals @code{log (@var{x}) / log (2)}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double logb (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float logbf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} logbl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions extract the exponent of @var{x} and return it as a +floating-point value. If @code{FLT_RADIX} is two, @code{logb} is equal +to @code{floor (log2 (x))}, except it's probably faster. + +If @var{x} is de-normalized, @code{logb} returns the exponent @var{x} +would have if it were normalized. If @var{x} is infinity (positive or +negative), @code{logb} returns @math{@infinity{}}. If @var{x} is zero, +@code{logb} returns @math{@infinity{}}. It does not signal. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun int ilogb (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx int ilogbf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx int ilogbl (long double @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long int} llogb (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long int} llogbf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long int} llogbl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions are equivalent to the corresponding @code{logb} +functions except that they return signed integer values. The +@code{ilogb} functions are from ISO C99; the @code{llogb} functions +are from TS 18661-1:2014. +@end deftypefun + +@noindent +Since integers cannot represent infinity and NaN, @code{ilogb} instead +returns an integer that can't be the exponent of a normal floating-point +number. @file{math.h} defines constants so you can check for this. + +@comment math.h +@comment ISO +@deftypevr Macro int FP_ILOGB0 +@code{ilogb} returns this value if its argument is @code{0}. The +numeric value is either @code{INT_MIN} or @code{-INT_MAX}. + +This macro is defined in @w{ISO C99}. +@end deftypevr + +@comment math.h +@comment ISO +@deftypevr Macro {long int} FP_LLOGB0 +@code{llogb} returns this value if its argument is @code{0}. The +numeric value is either @code{LONG_MIN} or @code{-LONG_MAX}. + +This macro is defined in TS 18661-1:2014. +@end deftypevr + +@comment math.h +@comment ISO +@deftypevr Macro int FP_ILOGBNAN +@code{ilogb} returns this value if its argument is @code{NaN}. The +numeric value is either @code{INT_MIN} or @code{INT_MAX}. + +This macro is defined in @w{ISO C99}. +@end deftypevr + +@comment math.h +@comment ISO +@deftypevr Macro {long int} FP_LLOGBNAN +@code{llogb} returns this value if its argument is @code{NaN}. The +numeric value is either @code{LONG_MIN} or @code{LONG_MAX}. + +This macro is defined in TS 18661-1:2014. +@end deftypevr + +These values are system specific. They might even be the same. The +proper way to test the result of @code{ilogb} is as follows: + +@smallexample +i = ilogb (f); +if (i == FP_ILOGB0 || i == FP_ILOGBNAN) + @{ + if (isnan (f)) + @{ + /* @r{Handle NaN.} */ + @} + else if (f == 0.0) + @{ + /* @r{Handle 0.0.} */ + @} + else + @{ + /* @r{Some other value with large exponent,} + @r{perhaps +Inf.} */ + @} + @} +@end smallexample + +@comment math.h +@comment ISO +@deftypefun double pow (double @var{base}, double @var{power}) +@comment math.h +@comment ISO +@deftypefunx float powf (float @var{base}, float @var{power}) +@comment math.h +@comment ISO +@deftypefunx {long double} powl (long double @var{base}, long double @var{power}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These are general exponentiation functions, returning @var{base} raised +to @var{power}. + +Mathematically, @code{pow} would return a complex number when @var{base} +is negative and @var{power} is not an integral value. @code{pow} can't +do that, so instead it signals a domain error. @code{pow} may also +underflow or overflow the destination type. +@end deftypefun + +@cindex square root function +@comment math.h +@comment ISO +@deftypefun double sqrt (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float sqrtf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} sqrtl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the nonnegative square root of @var{x}. + +If @var{x} is negative, @code{sqrt} signals a domain error. +Mathematically, it should return a complex number. +@end deftypefun + +@cindex cube root function +@comment math.h +@comment BSD +@deftypefun double cbrt (double @var{x}) +@comment math.h +@comment BSD +@deftypefunx float cbrtf (float @var{x}) +@comment math.h +@comment BSD +@deftypefunx {long double} cbrtl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the cube root of @var{x}. They cannot +fail; every representable real value has a representable real cube root. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double hypot (double @var{x}, double @var{y}) +@comment math.h +@comment ISO +@deftypefunx float hypotf (float @var{x}, float @var{y}) +@comment math.h +@comment ISO +@deftypefunx {long double} hypotl (long double @var{x}, long double @var{y}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return @code{sqrt (@var{x}*@var{x} + +@var{y}*@var{y})}. This is the length of the hypotenuse of a right +triangle with sides of length @var{x} and @var{y}, or the distance +of the point (@var{x}, @var{y}) from the origin. Using this function +instead of the direct formula is wise, since the error is +much smaller. See also the function @code{cabs} in @ref{Absolute Value}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double expm1 (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float expm1f (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} expm1l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return a value equivalent to @code{exp (@var{x}) - 1}. +They are computed in a way that is accurate even if @var{x} is +near zero---a case where @code{exp (@var{x}) - 1} would be inaccurate owing +to subtraction of two numbers that are nearly equal. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double log1p (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float log1pf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} log1pl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return a value equivalent to @w{@code{log (1 + @var{x})}}. +They are computed in a way that is accurate even if @var{x} is +near zero. +@end deftypefun + +@cindex complex exponentiation functions +@cindex complex logarithm functions + +@w{ISO C99} defines complex variants of some of the exponentiation and +logarithm functions. + +@comment complex.h +@comment ISO +@deftypefun {complex double} cexp (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} cexpf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} cexpl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return @code{e} (the base of natural +logarithms) raised to the power of @var{z}. +Mathematically, this corresponds to the value + +@ifnottex +@math{exp (z) = exp (creal (z)) * (cos (cimag (z)) + I * sin (cimag (z)))} +@end ifnottex +@tex +$$\exp(z) = e^z = e^{{\rm Re}\,z} (\cos ({\rm Im}\,z) + i \sin ({\rm Im}\,z))$$ +@end tex +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} clog (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} clogf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} clogl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the natural logarithm of @var{z}. +Mathematically, this corresponds to the value + +@ifnottex +@math{log (z) = log (cabs (z)) + I * carg (z)} +@end ifnottex +@tex +$$\log(z) = \log |z| + i \arg z$$ +@end tex + +@noindent +@code{clog} has a pole at 0, and will signal overflow if @var{z} equals +or is very close to 0. It is well-defined for all other values of +@var{z}. +@end deftypefun + + +@comment complex.h +@comment GNU +@deftypefun {complex double} clog10 (complex double @var{z}) +@comment complex.h +@comment GNU +@deftypefunx {complex float} clog10f (complex float @var{z}) +@comment complex.h +@comment GNU +@deftypefunx {complex long double} clog10l (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the base 10 logarithm of the complex value +@var{z}. Mathematically, this corresponds to the value + +@ifnottex +@math{log10 (z) = log10 (cabs (z)) + I * carg (z) / log (10)} +@end ifnottex +@tex +$$\log_{10}(z) = \log_{10}|z| + i \arg z / \log (10)$$ +@end tex + +These functions are GNU extensions. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} csqrt (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} csqrtf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} csqrtl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the complex square root of the argument @var{z}. Unlike +the real-valued functions, they are defined for all values of @var{z}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} cpow (complex double @var{base}, complex double @var{power}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} cpowf (complex float @var{base}, complex float @var{power}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} cpowl (complex long double @var{base}, complex long double @var{power}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return @var{base} raised to the power of +@var{power}. This is equivalent to @w{@code{cexp (y * clog (x))}} +@end deftypefun + +@node Hyperbolic Functions +@section Hyperbolic Functions +@cindex hyperbolic functions + +The functions in this section are related to the exponential functions; +see @ref{Exponents and Logarithms}. + +@comment math.h +@comment ISO +@deftypefun double sinh (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float sinhf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} sinhl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the hyperbolic sine of @var{x}, defined +mathematically as @w{@code{(exp (@var{x}) - exp (-@var{x})) / 2}}. They +may signal overflow if @var{x} is too large. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double cosh (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float coshf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} coshl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the hyperbolic cosine of @var{x}, +defined mathematically as @w{@code{(exp (@var{x}) + exp (-@var{x})) / 2}}. +They may signal overflow if @var{x} is too large. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double tanh (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float tanhf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} tanhl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the hyperbolic tangent of @var{x}, +defined mathematically as @w{@code{sinh (@var{x}) / cosh (@var{x})}}. +They may signal overflow if @var{x} is too large. +@end deftypefun + +@cindex hyperbolic functions + +There are counterparts for the hyperbolic functions which take +complex arguments. + +@comment complex.h +@comment ISO +@deftypefun {complex double} csinh (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} csinhf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} csinhl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the complex hyperbolic sine of @var{z}, defined +mathematically as @w{@code{(exp (@var{z}) - exp (-@var{z})) / 2}}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} ccosh (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} ccoshf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} ccoshl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the complex hyperbolic cosine of @var{z}, defined +mathematically as @w{@code{(exp (@var{z}) + exp (-@var{z})) / 2}}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} ctanh (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} ctanhf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} ctanhl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the complex hyperbolic tangent of @var{z}, +defined mathematically as @w{@code{csinh (@var{z}) / ccosh (@var{z})}}. +@end deftypefun + + +@cindex inverse hyperbolic functions + +@comment math.h +@comment ISO +@deftypefun double asinh (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float asinhf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} asinhl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the inverse hyperbolic sine of @var{x}---the +value whose hyperbolic sine is @var{x}. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double acosh (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float acoshf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} acoshl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the inverse hyperbolic cosine of @var{x}---the +value whose hyperbolic cosine is @var{x}. If @var{x} is less than +@code{1}, @code{acosh} signals a domain error. +@end deftypefun + +@comment math.h +@comment ISO +@deftypefun double atanh (double @var{x}) +@comment math.h +@comment ISO +@deftypefunx float atanhf (float @var{x}) +@comment math.h +@comment ISO +@deftypefunx {long double} atanhl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the inverse hyperbolic tangent of @var{x}---the +value whose hyperbolic tangent is @var{x}. If the absolute value of +@var{x} is greater than @code{1}, @code{atanh} signals a domain error; +if it is equal to 1, @code{atanh} returns infinity. +@end deftypefun + +@cindex inverse complex hyperbolic functions + +@comment complex.h +@comment ISO +@deftypefun {complex double} casinh (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} casinhf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} casinhl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the inverse complex hyperbolic sine of +@var{z}---the value whose complex hyperbolic sine is @var{z}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} cacosh (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} cacoshf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} cacoshl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the inverse complex hyperbolic cosine of +@var{z}---the value whose complex hyperbolic cosine is @var{z}. Unlike +the real-valued functions, there are no restrictions on the value of @var{z}. +@end deftypefun + +@comment complex.h +@comment ISO +@deftypefun {complex double} catanh (complex double @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex float} catanhf (complex float @var{z}) +@comment complex.h +@comment ISO +@deftypefunx {complex long double} catanhl (complex long double @var{z}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +These functions return the inverse complex hyperbolic tangent of +@var{z}---the value whose complex hyperbolic tangent is @var{z}. Unlike +the real-valued functions, there are no restrictions on the value of +@var{z}. +@end deftypefun + +@node Special Functions +@section Special Functions +@cindex special functions +@cindex Bessel functions +@cindex gamma function + +These are some more exotic mathematical functions which are sometimes +useful. Currently they only have real-valued versions. + +@comment math.h +@comment SVID +@deftypefun double erf (double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float erff (float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} erfl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{erf} returns the error function of @var{x}. The error +function is defined as +@tex +$$\hbox{erf}(x) = {2\over\sqrt{\pi}}\cdot\int_0^x e^{-t^2} \hbox{d}t$$ +@end tex +@ifnottex +@smallexample +erf (x) = 2/sqrt(pi) * integral from 0 to x of exp(-t^2) dt +@end smallexample +@end ifnottex +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double erfc (double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float erfcf (float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} erfcl (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{erfc} returns @code{1.0 - erf(@var{x})}, but computed in a +fashion that avoids round-off error when @var{x} is large. +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double lgamma (double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float lgammaf (float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} lgammal (long double @var{x}) +@safety{@prelim{}@mtunsafe{@mtasurace{:signgam}}@asunsafe{}@acsafe{}} +@code{lgamma} returns the natural logarithm of the absolute value of +the gamma function of @var{x}. The gamma function is defined as +@tex +$$\Gamma(x) = \int_0^\infty t^{x-1} e^{-t} \hbox{d}t$$ +@end tex +@ifnottex +@smallexample +gamma (x) = integral from 0 to @infinity{} of t^(x-1) e^-t dt +@end smallexample +@end ifnottex + +@vindex signgam +The sign of the gamma function is stored in the global variable +@var{signgam}, which is declared in @file{math.h}. It is @code{1} if +the intermediate result was positive or zero, or @code{-1} if it was +negative. + +To compute the real gamma function you can use the @code{tgamma} +function or you can compute the values as follows: +@smallexample +lgam = lgamma(x); +gam = signgam*exp(lgam); +@end smallexample + +The gamma function has singularities at the non-positive integers. +@code{lgamma} will raise the zero divide exception if evaluated at a +singularity. +@end deftypefun + +@comment math.h +@comment XPG +@deftypefun double lgamma_r (double @var{x}, int *@var{signp}) +@comment math.h +@comment XPG +@deftypefunx float lgammaf_r (float @var{x}, int *@var{signp}) +@comment math.h +@comment XPG +@deftypefunx {long double} lgammal_r (long double @var{x}, int *@var{signp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{lgamma_r} is just like @code{lgamma}, but it stores the sign of +the intermediate result in the variable pointed to by @var{signp} +instead of in the @var{signgam} global. This means it is reentrant. +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double gamma (double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float gammaf (float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} gammal (long double @var{x}) +@safety{@prelim{}@mtunsafe{@mtasurace{:signgam}}@asunsafe{}@acsafe{}} +These functions exist for compatibility reasons. They are equivalent to +@code{lgamma} etc. It is better to use @code{lgamma} since for one the +name reflects better the actual computation, and moreover @code{lgamma} is +standardized in @w{ISO C99} while @code{gamma} is not. +@end deftypefun + +@comment math.h +@comment XPG, ISO +@deftypefun double tgamma (double @var{x}) +@comment math.h +@comment XPG, ISO +@deftypefunx float tgammaf (float @var{x}) +@comment math.h +@comment XPG, ISO +@deftypefunx {long double} tgammal (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{tgamma} applies the gamma function to @var{x}. The gamma +function is defined as +@tex +$$\Gamma(x) = \int_0^\infty t^{x-1} e^{-t} \hbox{d}t$$ +@end tex +@ifnottex +@smallexample +gamma (x) = integral from 0 to @infinity{} of t^(x-1) e^-t dt +@end smallexample +@end ifnottex + +This function was introduced in @w{ISO C99}. +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double j0 (double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float j0f (float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} j0l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{j0} returns the Bessel function of the first kind of order 0 of +@var{x}. It may signal underflow if @var{x} is too large. +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double j1 (double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float j1f (float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} j1l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{j1} returns the Bessel function of the first kind of order 1 of +@var{x}. It may signal underflow if @var{x} is too large. +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double jn (int @var{n}, double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float jnf (int @var{n}, float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} jnl (int @var{n}, long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{jn} returns the Bessel function of the first kind of order +@var{n} of @var{x}. It may signal underflow if @var{x} is too large. +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double y0 (double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float y0f (float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} y0l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{y0} returns the Bessel function of the second kind of order 0 of +@var{x}. It may signal underflow if @var{x} is too large. If @var{x} +is negative, @code{y0} signals a domain error; if it is zero, +@code{y0} signals overflow and returns @math{-@infinity}. +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double y1 (double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float y1f (float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} y1l (long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{y1} returns the Bessel function of the second kind of order 1 of +@var{x}. It may signal underflow if @var{x} is too large. If @var{x} +is negative, @code{y1} signals a domain error; if it is zero, +@code{y1} signals overflow and returns @math{-@infinity}. +@end deftypefun + +@comment math.h +@comment SVID +@deftypefun double yn (int @var{n}, double @var{x}) +@comment math.h +@comment SVID +@deftypefunx float ynf (int @var{n}, float @var{x}) +@comment math.h +@comment SVID +@deftypefunx {long double} ynl (int @var{n}, long double @var{x}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{yn} returns the Bessel function of the second kind of order @var{n} of +@var{x}. It may signal underflow if @var{x} is too large. If @var{x} +is negative, @code{yn} signals a domain error; if it is zero, +@code{yn} signals overflow and returns @math{-@infinity}. +@end deftypefun + +@node Errors in Math Functions +@section Known Maximum Errors in Math Functions +@cindex math errors +@cindex ulps + +This section lists the known errors of the functions in the math +library. Errors are measured in ``units of the last place''. This is a +measure for the relative error. For a number @math{z} with the +representation @math{d.d@dots{}d@mul{}2^e} (we assume IEEE +floating-point numbers with base 2) the ULP is represented by + +@tex +$${|d.d\dots d - (z/2^e)|}\over {2^{p-1}}$$ +@end tex +@ifnottex +@smallexample +|d.d...d - (z / 2^e)| / 2^(p - 1) +@end smallexample +@end ifnottex + +@noindent +where @math{p} is the number of bits in the mantissa of the +floating-point number representation. Ideally the error for all +functions is always less than 0.5ulps in round-to-nearest mode. Using +rounding bits this is also +possible and normally implemented for the basic operations. Except +for certain functions such as @code{sqrt}, @code{fma} and @code{rint} +whose results are fully specified by reference to corresponding IEEE +754 floating-point operations, and conversions between strings and +floating point, @theglibc{} does not aim for correctly rounded results +for functions in the math library, and does not aim for correctness in +whether ``inexact'' exceptions are raised. Instead, the goals for +accuracy of functions without fully specified results are as follows; +some functions have bugs meaning they do not meet these goals in all +cases. In the future, @theglibc{} may provide some other correctly +rounding functions under the names such as @code{crsin} proposed for +an extension to ISO C. + +@itemize @bullet + +@item +Each function with a floating-point result behaves as if it computes +an infinite-precision result that is within a few ulp (in both real +and complex parts, for functions with complex results) of the +mathematically correct value of the function (interpreted together +with ISO C or POSIX semantics for the function in question) at the +exact value passed as the input. Exceptions are raised appropriately +for this value and in accordance with IEEE 754 / ISO C / POSIX +semantics, and it is then rounded according to the current rounding +direction to the result that is returned to the user. @code{errno} +may also be set (@pxref{Math Error Reporting}). (The ``inexact'' +exception may be raised, or not raised, even if this is inconsistent +with the infinite-precision value.) + +@item +For the IBM @code{long double} format, as used on PowerPC GNU/Linux, +the accuracy goal is weaker for input values not exactly representable +in 106 bits of precision; it is as if the input value is some value +within 0.5ulp of the value actually passed, where ``ulp'' is +interpreted in terms of a fixed-precision 106-bit mantissa, but not +necessarily the exact value actually passed with discontiguous +mantissa bits. + +@item +For the IBM @code{long double} format, functions whose results are +fully specified by reference to corresponding IEEE 754 floating-point +operations have the same accuracy goals as other functions, but with +the error bound being the same as that for division (3ulp). +Furthermore, ``inexact'' and ``underflow'' exceptions may be raised +for all functions for any inputs, even where such exceptions are +inconsistent with the returned value, since the underlying +floating-point arithmetic has that property. + +@item +Functions behave as if the infinite-precision result computed is zero, +infinity or NaN if and only if that is the mathematically correct +infinite-precision result. They behave as if the infinite-precision +result computed always has the same sign as the mathematically correct +result. + +@item +If the mathematical result is more than a few ulp above the overflow +threshold for the current rounding direction, the value returned is +the appropriate overflow value for the current rounding direction, +with the overflow exception raised. + +@item +If the mathematical result has magnitude well below half the least +subnormal magnitude, the returned value is either zero or the least +subnormal (in each case, with the correct sign), according to the +current rounding direction and with the underflow exception raised. + +@item +Where the mathematical result underflows (before rounding) and is not +exactly representable as a floating-point value, the function does not +behave as if the computed infinite-precision result is an exact value +in the subnormal range. This means that the underflow exception is +raised other than possibly for cases where the mathematical result is +very close to the underflow threshold and the function behaves as if +it computes an infinite-precision result that does not underflow. (So +there may be spurious underflow exceptions in cases where the +underflowing result is exact, but not missing underflow exceptions in +cases where it is inexact.) + +@item +@Theglibc{} does not aim for functions to satisfy other properties of +the underlying mathematical function, such as monotonicity, where not +implied by the above goals. + +@item +All the above applies to both real and complex parts, for complex +functions. + +@end itemize + +Therefore many of the functions in the math library have errors. The +table lists the maximum error for each function which is exposed by one +of the existing tests in the test suite. The table tries to cover as much +as possible and list the actual maximum error (or at least a ballpark +figure) but this is often not achieved due to the large search space. + +The table lists the ULP values for different architectures. Different +architectures have different results since their hardware support for +floating-point operations varies and also the existing hardware support +is different. Only the round-to-nearest rounding mode is covered by +this table, and vector versions of functions are not covered. +Functions not listed do not have known errors. + +@page +@c This multitable does not fit on a single page +@include libm-err.texi + +@node Pseudo-Random Numbers +@section Pseudo-Random Numbers +@cindex random numbers +@cindex pseudo-random numbers +@cindex seed (for random numbers) + +This section describes the GNU facilities for generating a series of +pseudo-random numbers. The numbers generated are not truly random; +typically, they form a sequence that repeats periodically, with a period +so large that you can ignore it for ordinary purposes. The random +number generator works by remembering a @dfn{seed} value which it uses +to compute the next random number and also to compute a new seed. + +Although the generated numbers look unpredictable within one run of a +program, the sequence of numbers is @emph{exactly the same} from one run +to the next. This is because the initial seed is always the same. This +is convenient when you are debugging a program, but it is unhelpful if +you want the program to behave unpredictably. If you want a different +pseudo-random series each time your program runs, you must specify a +different seed each time. For ordinary purposes, basing the seed on the +current time works well. For random numbers in cryptography, +@pxref{Unpredictable Bytes}. + +You can obtain repeatable sequences of numbers on a particular machine type +by specifying the same initial seed value for the random number +generator. There is no standard meaning for a particular seed value; +the same seed, used in different C libraries or on different CPU types, +will give you different random numbers. + +@Theglibc{} supports the standard @w{ISO C} random number functions +plus two other sets derived from BSD and SVID. The BSD and @w{ISO C} +functions provide identical, somewhat limited functionality. If only a +small number of random bits are required, we recommend you use the +@w{ISO C} interface, @code{rand} and @code{srand}. The SVID functions +provide a more flexible interface, which allows better random number +generator algorithms, provides more random bits (up to 48) per call, and +can provide random floating-point numbers. These functions are required +by the XPG standard and therefore will be present in all modern Unix +systems. + +@menu +* ISO Random:: @code{rand} and friends. +* BSD Random:: @code{random} and friends. +* SVID Random:: @code{drand48} and friends. +@end menu + +@node ISO Random +@subsection ISO C Random Number Functions + +This section describes the random number functions that are part of +the @w{ISO C} standard. + +To use these facilities, you should include the header file +@file{stdlib.h} in your program. +@pindex stdlib.h + +@comment stdlib.h +@comment ISO +@deftypevr Macro int RAND_MAX +The value of this macro is an integer constant representing the largest +value the @code{rand} function can return. In @theglibc{}, it is +@code{2147483647}, which is the largest signed integer representable in +32 bits. In other libraries, it may be as low as @code{32767}. +@end deftypevr + +@comment stdlib.h +@comment ISO +@deftypefun int rand (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c Just calls random. +The @code{rand} function returns the next pseudo-random number in the +series. The value ranges from @code{0} to @code{RAND_MAX}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun void srand (unsigned int @var{seed}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c Alias to srandom. +This function establishes @var{seed} as the seed for a new series of +pseudo-random numbers. If you call @code{rand} before a seed has been +established with @code{srand}, it uses the value @code{1} as a default +seed. + +To produce a different pseudo-random series each time your program is +run, do @code{srand (time (0))}. +@end deftypefun + +POSIX.1 extended the C standard functions to support reproducible random +numbers in multi-threaded programs. However, the extension is badly +designed and unsuitable for serious work. + +@comment stdlib.h +@comment POSIX.1 +@deftypefun int rand_r (unsigned int *@var{seed}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function returns a random number in the range 0 to @code{RAND_MAX} +just as @code{rand} does. However, all its state is stored in the +@var{seed} argument. This means the RNG's state can only have as many +bits as the type @code{unsigned int} has. This is far too few to +provide a good RNG. + +If your program requires a reentrant RNG, we recommend you use the +reentrant GNU extensions to the SVID random number generator. The +POSIX.1 interface should only be used when the GNU extensions are not +available. +@end deftypefun + + +@node BSD Random +@subsection BSD Random Number Functions + +This section describes a set of random number generation functions that +are derived from BSD. There is no advantage to using these functions +with @theglibc{}; we support them for BSD compatibility only. + +The prototypes for these functions are in @file{stdlib.h}. +@pindex stdlib.h + +@comment stdlib.h +@comment BSD +@deftypefun {long int} random (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c Takes a lock and calls random_r with an automatic variable and the +@c global state, while holding a lock. +This function returns the next pseudo-random number in the sequence. +The value returned ranges from @code{0} to @code{2147483647}. + +@strong{NB:} Temporarily this function was defined to return a +@code{int32_t} value to indicate that the return value always contains +32 bits even if @code{long int} is wider. The standard demands it +differently. Users must always be aware of the 32-bit limitation, +though. +@end deftypefun + +@comment stdlib.h +@comment BSD +@deftypefun void srandom (unsigned int @var{seed}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c Takes a lock and calls srandom_r with an automatic variable and a +@c static buffer. There's no MT-safety issue because the static buffer +@c is internally protected by a lock, although other threads may modify +@c the set state before it is used. +The @code{srandom} function sets the state of the random number +generator based on the integer @var{seed}. If you supply a @var{seed} value +of @code{1}, this will cause @code{random} to reproduce the default set +of random numbers. + +To produce a different set of pseudo-random numbers each time your +program runs, do @code{srandom (time (0))}. +@end deftypefun + +@comment stdlib.h +@comment BSD +@deftypefun {char *} initstate (unsigned int @var{seed}, char *@var{state}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +The @code{initstate} function is used to initialize the random number +generator state. The argument @var{state} is an array of @var{size} +bytes, used to hold the state information. It is initialized based on +@var{seed}. The size must be between 8 and 256 bytes, and should be a +power of two. The bigger the @var{state} array, the better. + +The return value is the previous value of the state information array. +You can use this value later as an argument to @code{setstate} to +restore that state. +@end deftypefun + +@comment stdlib.h +@comment BSD +@deftypefun {char *} setstate (char *@var{state}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +The @code{setstate} function restores the random number state +information @var{state}. The argument must have been the result of +a previous call to @var{initstate} or @var{setstate}. + +The return value is the previous value of the state information array. +You can use this value later as an argument to @code{setstate} to +restore that state. + +If the function fails the return value is @code{NULL}. +@end deftypefun + +The four functions described so far in this section all work on a state +which is shared by all threads. The state is not directly accessible to +the user and can only be modified by these functions. This makes it +hard to deal with situations where each thread should have its own +pseudo-random number generator. + +@Theglibc{} contains four additional functions which contain the +state as an explicit parameter and therefore make it possible to handle +thread-local PRNGs. Besides this there is no difference. In fact, the +four functions already discussed are implemented internally using the +following interfaces. + +The @file{stdlib.h} header contains a definition of the following type: + +@comment stdlib.h +@comment GNU +@deftp {Data Type} {struct random_data} + +Objects of type @code{struct random_data} contain the information +necessary to represent the state of the PRNG. Although a complete +definition of the type is present the type should be treated as opaque. +@end deftp + +The functions modifying the state follow exactly the already described +functions. + +@comment stdlib.h +@comment GNU +@deftypefun int random_r (struct random_data *restrict @var{buf}, int32_t *restrict @var{result}) +@safety{@prelim{}@mtsafe{@mtsrace{:buf}}@assafe{}@acunsafe{@acucorrupt{}}} +The @code{random_r} function behaves exactly like the @code{random} +function except that it uses and modifies the state in the object +pointed to by the first parameter instead of the global state. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int srandom_r (unsigned int @var{seed}, struct random_data *@var{buf}) +@safety{@prelim{}@mtsafe{@mtsrace{:buf}}@assafe{}@acunsafe{@acucorrupt{}}} +The @code{srandom_r} function behaves exactly like the @code{srandom} +function except that it uses and modifies the state in the object +pointed to by the second parameter instead of the global state. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int initstate_r (unsigned int @var{seed}, char *restrict @var{statebuf}, size_t @var{statelen}, struct random_data *restrict @var{buf}) +@safety{@prelim{}@mtsafe{@mtsrace{:buf}}@assafe{}@acunsafe{@acucorrupt{}}} +The @code{initstate_r} function behaves exactly like the @code{initstate} +function except that it uses and modifies the state in the object +pointed to by the fourth parameter instead of the global state. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int setstate_r (char *restrict @var{statebuf}, struct random_data *restrict @var{buf}) +@safety{@prelim{}@mtsafe{@mtsrace{:buf}}@assafe{}@acunsafe{@acucorrupt{}}} +The @code{setstate_r} function behaves exactly like the @code{setstate} +function except that it uses and modifies the state in the object +pointed to by the first parameter instead of the global state. +@end deftypefun + +@node SVID Random +@subsection SVID Random Number Function + +The C library on SVID systems contains yet another kind of random number +generator functions. They use a state of 48 bits of data. The user can +choose among a collection of functions which return the random bits +in different forms. + +Generally there are two kinds of function. The first uses a state of +the random number generator which is shared among several functions and +by all threads of the process. The second requires the user to handle +the state. + +All functions have in common that they use the same congruential +formula with the same constants. The formula is + +@smallexample +Y = (a * X + c) mod m +@end smallexample + +@noindent +where @var{X} is the state of the generator at the beginning and +@var{Y} the state at the end. @code{a} and @code{c} are constants +determining the way the generator works. By default they are + +@smallexample +a = 0x5DEECE66D = 25214903917 +c = 0xb = 11 +@end smallexample + +@noindent +but they can also be changed by the user. @code{m} is of course 2^48 +since the state consists of a 48-bit array. + +The prototypes for these functions are in @file{stdlib.h}. +@pindex stdlib.h + + +@comment stdlib.h +@comment SVID +@deftypefun double drand48 (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +@c Uses of the static state buffer are not guarded by a lock (thus +@c @mtasurace:drand48), so they may be found or left at a +@c partially-updated state in case of calls from within signal handlers +@c or cancellation. None of this will break safety rules or invoke +@c undefined behavior, but it may affect randomness. +This function returns a @code{double} value in the range of @code{0.0} +to @code{1.0} (exclusive). The random bits are determined by the global +state of the random number generator in the C library. + +Since the @code{double} type according to @w{IEEE 754} has a 52-bit +mantissa this means 4 bits are not initialized by the random number +generator. These are (of course) chosen to be the least significant +bits and they are initialized to @code{0}. +@end deftypefun + +@comment stdlib.h +@comment SVID +@deftypefun double erand48 (unsigned short int @var{xsubi}[3]) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +@c The static buffer is just initialized with default parameters, which +@c are later read to advance the state held in xsubi. +This function returns a @code{double} value in the range of @code{0.0} +to @code{1.0} (exclusive), similarly to @code{drand48}. The argument is +an array describing the state of the random number generator. + +This function can be called subsequently since it updates the array to +guarantee random numbers. The array should have been initialized before +initial use to obtain reproducible results. +@end deftypefun + +@comment stdlib.h +@comment SVID +@deftypefun {long int} lrand48 (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +The @code{lrand48} function returns an integer value in the range of +@code{0} to @code{2^31} (exclusive). Even if the size of the @code{long +int} type can take more than 32 bits, no higher numbers are returned. +The random bits are determined by the global state of the random number +generator in the C library. +@end deftypefun + +@comment stdlib.h +@comment SVID +@deftypefun {long int} nrand48 (unsigned short int @var{xsubi}[3]) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +This function is similar to the @code{lrand48} function in that it +returns a number in the range of @code{0} to @code{2^31} (exclusive) but +the state of the random number generator used to produce the random bits +is determined by the array provided as the parameter to the function. + +The numbers in the array are updated afterwards so that subsequent calls +to this function yield different results (as is expected of a random +number generator). The array should have been initialized before the +first call to obtain reproducible results. +@end deftypefun + +@comment stdlib.h +@comment SVID +@deftypefun {long int} mrand48 (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +The @code{mrand48} function is similar to @code{lrand48}. The only +difference is that the numbers returned are in the range @code{-2^31} to +@code{2^31} (exclusive). +@end deftypefun + +@comment stdlib.h +@comment SVID +@deftypefun {long int} jrand48 (unsigned short int @var{xsubi}[3]) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +The @code{jrand48} function is similar to @code{nrand48}. The only +difference is that the numbers returned are in the range @code{-2^31} to +@code{2^31} (exclusive). For the @code{xsubi} parameter the same +requirements are necessary. +@end deftypefun + +The internal state of the random number generator can be initialized in +several ways. The methods differ in the completeness of the +information provided. + +@comment stdlib.h +@comment SVID +@deftypefun void srand48 (long int @var{seedval}) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +The @code{srand48} function sets the most significant 32 bits of the +internal state of the random number generator to the least +significant 32 bits of the @var{seedval} parameter. The lower 16 bits +are initialized to the value @code{0x330E}. Even if the @code{long +int} type contains more than 32 bits only the lower 32 bits are used. + +Owing to this limitation, initialization of the state of this +function is not very useful. But it makes it easy to use a construct +like @code{srand48 (time (0))}. + +A side-effect of this function is that the values @code{a} and @code{c} +from the internal state, which are used in the congruential formula, +are reset to the default values given above. This is of importance once +the user has called the @code{lcong48} function (see below). +@end deftypefun + +@comment stdlib.h +@comment SVID +@deftypefun {unsigned short int *} seed48 (unsigned short int @var{seed16v}[3]) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +The @code{seed48} function initializes all 48 bits of the state of the +internal random number generator from the contents of the parameter +@var{seed16v}. Here the lower 16 bits of the first element of +@var{seed16v} initialize the least significant 16 bits of the internal +state, the lower 16 bits of @code{@var{seed16v}[1]} initialize the mid-order +16 bits of the state and the 16 lower bits of @code{@var{seed16v}[2]} +initialize the most significant 16 bits of the state. + +Unlike @code{srand48} this function lets the user initialize all 48 bits +of the state. + +The value returned by @code{seed48} is a pointer to an array containing +the values of the internal state before the change. This might be +useful to restart the random number generator at a certain state. +Otherwise the value can simply be ignored. + +As for @code{srand48}, the values @code{a} and @code{c} from the +congruential formula are reset to the default values. +@end deftypefun + +There is one more function to initialize the random number generator +which enables you to specify even more information by allowing you to +change the parameters in the congruential formula. + +@comment stdlib.h +@comment SVID +@deftypefun void lcong48 (unsigned short int @var{param}[7]) +@safety{@prelim{}@mtunsafe{@mtasurace{:drand48}}@asunsafe{}@acunsafe{@acucorrupt{}}} +The @code{lcong48} function allows the user to change the complete state +of the random number generator. Unlike @code{srand48} and +@code{seed48}, this function also changes the constants in the +congruential formula. + +From the seven elements in the array @var{param} the least significant +16 bits of the entries @code{@var{param}[0]} to @code{@var{param}[2]} +determine the initial state, the least significant 16 bits of +@code{@var{param}[3]} to @code{@var{param}[5]} determine the 48 bit +constant @code{a} and @code{@var{param}[6]} determines the 16-bit value +@code{c}. +@end deftypefun + +All the above functions have in common that they use the global +parameters for the congruential formula. In multi-threaded programs it +might sometimes be useful to have different parameters in different +threads. For this reason all the above functions have a counterpart +which works on a description of the random number generator in the +user-supplied buffer instead of the global state. + +Please note that it is no problem if several threads use the global +state if all threads use the functions which take a pointer to an array +containing the state. The random numbers are computed following the +same loop but if the state in the array is different all threads will +obtain an individual random number generator. + +The user-supplied buffer must be of type @code{struct drand48_data}. +This type should be regarded as opaque and not manipulated directly. + +@comment stdlib.h +@comment GNU +@deftypefun int drand48_r (struct drand48_data *@var{buffer}, double *@var{result}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +This function is equivalent to the @code{drand48} function with the +difference that it does not modify the global random number generator +parameters but instead the parameters in the buffer supplied through the +pointer @var{buffer}. The random number is returned in the variable +pointed to by @var{result}. + +The return value of the function indicates whether the call succeeded. +If the value is less than @code{0} an error occurred and @var{errno} is +set to indicate the problem. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int erand48_r (unsigned short int @var{xsubi}[3], struct drand48_data *@var{buffer}, double *@var{result}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +The @code{erand48_r} function works like @code{erand48}, but in addition +it takes an argument @var{buffer} which describes the random number +generator. The state of the random number generator is taken from the +@code{xsubi} array, the parameters for the congruential formula from the +global random number generator data. The random number is returned in +the variable pointed to by @var{result}. + +The return value is non-negative if the call succeeded. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int lrand48_r (struct drand48_data *@var{buffer}, long int *@var{result}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +This function is similar to @code{lrand48}, but in addition it takes a +pointer to a buffer describing the state of the random number generator +just like @code{drand48}. + +If the return value of the function is non-negative the variable pointed +to by @var{result} contains the result. Otherwise an error occurred. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int nrand48_r (unsigned short int @var{xsubi}[3], struct drand48_data *@var{buffer}, long int *@var{result}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +The @code{nrand48_r} function works like @code{nrand48} in that it +produces a random number in the range @code{0} to @code{2^31}. But instead +of using the global parameters for the congruential formula it uses the +information from the buffer pointed to by @var{buffer}. The state is +described by the values in @var{xsubi}. + +If the return value is non-negative the variable pointed to by +@var{result} contains the result. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int mrand48_r (struct drand48_data *@var{buffer}, long int *@var{result}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +This function is similar to @code{mrand48} but like the other reentrant +functions it uses the random number generator described by the value in +the buffer pointed to by @var{buffer}. + +If the return value is non-negative the variable pointed to by +@var{result} contains the result. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int jrand48_r (unsigned short int @var{xsubi}[3], struct drand48_data *@var{buffer}, long int *@var{result}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +The @code{jrand48_r} function is similar to @code{jrand48}. Like the +other reentrant functions of this function family it uses the +congruential formula parameters from the buffer pointed to by +@var{buffer}. + +If the return value is non-negative the variable pointed to by +@var{result} contains the result. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +Before any of the above functions are used the buffer of type +@code{struct drand48_data} should be initialized. The easiest way to do +this is to fill the whole buffer with null bytes, e.g. by + +@smallexample +memset (buffer, '\0', sizeof (struct drand48_data)); +@end smallexample + +@noindent +Using any of the reentrant functions of this family now will +automatically initialize the random number generator to the default +values for the state and the parameters of the congruential formula. + +The other possibility is to use any of the functions which explicitly +initialize the buffer. Though it might be obvious how to initialize the +buffer from looking at the parameter to the function, it is highly +recommended to use these functions since the result might not always be +what you expect. + +@comment stdlib.h +@comment GNU +@deftypefun int srand48_r (long int @var{seedval}, struct drand48_data *@var{buffer}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +The description of the random number generator represented by the +information in @var{buffer} is initialized similarly to what the function +@code{srand48} does. The state is initialized from the parameter +@var{seedval} and the parameters for the congruential formula are +initialized to their default values. + +If the return value is non-negative the function call succeeded. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int seed48_r (unsigned short int @var{seed16v}[3], struct drand48_data *@var{buffer}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +This function is similar to @code{srand48_r} but like @code{seed48} it +initializes all 48 bits of the state from the parameter @var{seed16v}. + +If the return value is non-negative the function call succeeded. It +does not return a pointer to the previous state of the random number +generator like the @code{seed48} function does. If the user wants to +preserve the state for a later re-run s/he can copy the whole buffer +pointed to by @var{buffer}. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int lcong48_r (unsigned short int @var{param}[7], struct drand48_data *@var{buffer}) +@safety{@prelim{}@mtsafe{@mtsrace{:buffer}}@assafe{}@acunsafe{@acucorrupt{}}} +This function initializes all aspects of the random number generator +described in @var{buffer} with the data in @var{param}. Here it is +especially true that the function does more than just copying the +contents of @var{param} and @var{buffer}. More work is required and +therefore it is important to use this function rather than initializing +the random number generator directly. + +If the return value is non-negative the function call succeeded. + +This function is a GNU extension and should not be used in portable +programs. +@end deftypefun + +@node FP Function Optimizations +@section Is Fast Code or Small Code preferred? +@cindex Optimization + +If an application uses many floating point functions it is often the case +that the cost of the function calls themselves is not negligible. +Modern processors can often execute the operations themselves +very fast, but the function call disrupts the instruction pipeline. + +For this reason @theglibc{} provides optimizations for many of the +frequently-used math functions. When GNU CC is used and the user +activates the optimizer, several new inline functions and macros are +defined. These new functions and macros have the same names as the +library functions and so are used instead of the latter. In the case of +inline functions the compiler will decide whether it is reasonable to +use them, and this decision is usually correct. + +This means that no calls to the library functions may be necessary, and +can increase the speed of generated code significantly. The drawback is +that code size will increase, and the increase is not always negligible. + +There are two kinds of inline functions: those that give the same result +as the library functions and others that might not set @code{errno} and +might have a reduced precision and/or argument range in comparison with +the library functions. The latter inline functions are only available +if the flag @code{-ffast-math} is given to GNU CC. + +In cases where the inline functions and macros are not wanted the symbol +@code{__NO_MATH_INLINES} should be defined before any system header is +included. This will ensure that only library functions are used. Of +course, it can be determined for each file in the project whether +giving this option is preferable or not. + +Not all hardware implements the entire @w{IEEE 754} standard, and even +if it does there may be a substantial performance penalty for using some +of its features. For example, enabling traps on some processors forces +the FPU to run un-pipelined, which can more than double calculation time. +@c ***Add explanation of -lieee, -mieee. diff --git a/REORG.TODO/manual/memory.texi b/REORG.TODO/manual/memory.texi new file mode 100644 index 0000000000..fb6b594ef1 --- /dev/null +++ b/REORG.TODO/manual/memory.texi @@ -0,0 +1,3504 @@ +@node Memory, Character Handling, Error Reporting, Top +@chapter Virtual Memory Allocation And Paging +@c %MENU% Allocating virtual memory and controlling paging +@cindex memory allocation +@cindex storage allocation + +This chapter describes how processes manage and use memory in a system +that uses @theglibc{}. + +@Theglibc{} has several functions for dynamically allocating +virtual memory in various ways. They vary in generality and in +efficiency. The library also provides functions for controlling paging +and allocation of real memory. + + +@menu +* Memory Concepts:: An introduction to concepts and terminology. +* Memory Allocation:: Allocating storage for your program data +* Resizing the Data Segment:: @code{brk}, @code{sbrk} +* Locking Pages:: Preventing page faults +@end menu + +Memory mapped I/O is not discussed in this chapter. @xref{Memory-mapped I/O}. + + + +@node Memory Concepts +@section Process Memory Concepts + +One of the most basic resources a process has available to it is memory. +There are a lot of different ways systems organize memory, but in a +typical one, each process has one linear virtual address space, with +addresses running from zero to some huge maximum. It need not be +contiguous; i.e., not all of these addresses actually can be used to +store data. + +The virtual memory is divided into pages (4 kilobytes is typical). +Backing each page of virtual memory is a page of real memory (called a +@dfn{frame}) or some secondary storage, usually disk space. The disk +space might be swap space or just some ordinary disk file. Actually, a +page of all zeroes sometimes has nothing at all backing it -- there's +just a flag saying it is all zeroes. +@cindex page frame +@cindex frame, real memory +@cindex swap space +@cindex page, virtual memory + +The same frame of real memory or backing store can back multiple virtual +pages belonging to multiple processes. This is normally the case, for +example, with virtual memory occupied by @glibcadj{} code. The same +real memory frame containing the @code{printf} function backs a virtual +memory page in each of the existing processes that has a @code{printf} +call in its program. + +In order for a program to access any part of a virtual page, the page +must at that moment be backed by (``connected to'') a real frame. But +because there is usually a lot more virtual memory than real memory, the +pages must move back and forth between real memory and backing store +regularly, coming into real memory when a process needs to access them +and then retreating to backing store when not needed anymore. This +movement is called @dfn{paging}. + +When a program attempts to access a page which is not at that moment +backed by real memory, this is known as a @dfn{page fault}. When a page +fault occurs, the kernel suspends the process, places the page into a +real page frame (this is called ``paging in'' or ``faulting in''), then +resumes the process so that from the process' point of view, the page +was in real memory all along. In fact, to the process, all pages always +seem to be in real memory. Except for one thing: the elapsed execution +time of an instruction that would normally be a few nanoseconds is +suddenly much, much, longer (because the kernel normally has to do I/O +to complete the page-in). For programs sensitive to that, the functions +described in @ref{Locking Pages} can control it. +@cindex page fault +@cindex paging + +Within each virtual address space, a process has to keep track of what +is at which addresses, and that process is called memory allocation. +Allocation usually brings to mind meting out scarce resources, but in +the case of virtual memory, that's not a major goal, because there is +generally much more of it than anyone needs. Memory allocation within a +process is mainly just a matter of making sure that the same byte of +memory isn't used to store two different things. + +Processes allocate memory in two major ways: by exec and +programmatically. Actually, forking is a third way, but it's not very +interesting. @xref{Creating a Process}. + +Exec is the operation of creating a virtual address space for a process, +loading its basic program into it, and executing the program. It is +done by the ``exec'' family of functions (e.g. @code{execl}). The +operation takes a program file (an executable), it allocates space to +load all the data in the executable, loads it, and transfers control to +it. That data is most notably the instructions of the program (the +@dfn{text}), but also literals and constants in the program and even +some variables: C variables with the static storage class (@pxref{Memory +Allocation and C}). +@cindex executable +@cindex literals +@cindex constants + +Once that program begins to execute, it uses programmatic allocation to +gain additional memory. In a C program with @theglibc{}, there +are two kinds of programmatic allocation: automatic and dynamic. +@xref{Memory Allocation and C}. + +Memory-mapped I/O is another form of dynamic virtual memory allocation. +Mapping memory to a file means declaring that the contents of certain +range of a process' addresses shall be identical to the contents of a +specified regular file. The system makes the virtual memory initially +contain the contents of the file, and if you modify the memory, the +system writes the same modification to the file. Note that due to the +magic of virtual memory and page faults, there is no reason for the +system to do I/O to read the file, or allocate real memory for its +contents, until the program accesses the virtual memory. +@xref{Memory-mapped I/O}. +@cindex memory mapped I/O +@cindex memory mapped file +@cindex files, accessing + +Just as it programmatically allocates memory, the program can +programmatically deallocate (@dfn{free}) it. You can't free the memory +that was allocated by exec. When the program exits or execs, you might +say that all its memory gets freed, but since in both cases the address +space ceases to exist, the point is really moot. @xref{Program +Termination}. +@cindex execing a program +@cindex freeing memory +@cindex exiting a program + +A process' virtual address space is divided into segments. A segment is +a contiguous range of virtual addresses. Three important segments are: + +@itemize @bullet + +@item + +The @dfn{text segment} contains a program's instructions and literals and +static constants. It is allocated by exec and stays the same size for +the life of the virtual address space. + +@item +The @dfn{data segment} is working storage for the program. It can be +preallocated and preloaded by exec and the process can extend or shrink +it by calling functions as described in @xref{Resizing the Data +Segment}. Its lower end is fixed. + +@item +The @dfn{stack segment} contains a program stack. It grows as the stack +grows, but doesn't shrink when the stack shrinks. + +@end itemize + + + +@node Memory Allocation +@section Allocating Storage For Program Data + +This section covers how ordinary programs manage storage for their data, +including the famous @code{malloc} function and some fancier facilities +special to @theglibc{} and GNU Compiler. + +@menu +* Memory Allocation and C:: How to get different kinds of allocation in C. +* The GNU Allocator:: An overview of the GNU @code{malloc} + implementation. +* Unconstrained Allocation:: The @code{malloc} facility allows fully general + dynamic allocation. +* Allocation Debugging:: Finding memory leaks and not freed memory. +* Replacing malloc:: Using your own @code{malloc}-style allocator. +* Obstacks:: Obstacks are less general than malloc + but more efficient and convenient. +* Variable Size Automatic:: Allocation of variable-sized blocks + of automatic storage that are freed when the + calling function returns. +@end menu + + +@node Memory Allocation and C +@subsection Memory Allocation in C Programs + +The C language supports two kinds of memory allocation through the +variables in C programs: + +@itemize @bullet +@item +@dfn{Static allocation} is what happens when you declare a static or +global variable. Each static or global variable defines one block of +space, of a fixed size. The space is allocated once, when your program +is started (part of the exec operation), and is never freed. +@cindex static memory allocation +@cindex static storage class + +@item +@dfn{Automatic allocation} happens when you declare an automatic +variable, such as a function argument or a local variable. The space +for an automatic variable is allocated when the compound statement +containing the declaration is entered, and is freed when that +compound statement is exited. +@cindex automatic memory allocation +@cindex automatic storage class + +In GNU C, the size of the automatic storage can be an expression +that varies. In other C implementations, it must be a constant. +@end itemize + +A third important kind of memory allocation, @dfn{dynamic allocation}, +is not supported by C variables but is available via @glibcadj{} +functions. +@cindex dynamic memory allocation + +@subsubsection Dynamic Memory Allocation +@cindex dynamic memory allocation + +@dfn{Dynamic memory allocation} is a technique in which programs +determine as they are running where to store some information. You need +dynamic allocation when the amount of memory you need, or how long you +continue to need it, depends on factors that are not known before the +program runs. + +For example, you may need a block to store a line read from an input +file; since there is no limit to how long a line can be, you must +allocate the memory dynamically and make it dynamically larger as you +read more of the line. + +Or, you may need a block for each record or each definition in the input +data; since you can't know in advance how many there will be, you must +allocate a new block for each record or definition as you read it. + +When you use dynamic allocation, the allocation of a block of memory is +an action that the program requests explicitly. You call a function or +macro when you want to allocate space, and specify the size with an +argument. If you want to free the space, you do so by calling another +function or macro. You can do these things whenever you want, as often +as you want. + +Dynamic allocation is not supported by C variables; there is no storage +class ``dynamic'', and there can never be a C variable whose value is +stored in dynamically allocated space. The only way to get dynamically +allocated memory is via a system call (which is generally via a @glibcadj{} +function call), and the only way to refer to dynamically +allocated space is through a pointer. Because it is less convenient, +and because the actual process of dynamic allocation requires more +computation time, programmers generally use dynamic allocation only when +neither static nor automatic allocation will serve. + +For example, if you want to allocate dynamically some space to hold a +@code{struct foobar}, you cannot declare a variable of type @code{struct +foobar} whose contents are the dynamically allocated space. But you can +declare a variable of pointer type @code{struct foobar *} and assign it the +address of the space. Then you can use the operators @samp{*} and +@samp{->} on this pointer variable to refer to the contents of the space: + +@smallexample +@{ + struct foobar *ptr + = (struct foobar *) malloc (sizeof (struct foobar)); + ptr->name = x; + ptr->next = current_foobar; + current_foobar = ptr; +@} +@end smallexample + +@node The GNU Allocator +@subsection The GNU Allocator +@cindex gnu allocator + +The @code{malloc} implementation in @theglibc{} is derived from ptmalloc +(pthreads malloc), which in turn is derived from dlmalloc (Doug Lea malloc). +This malloc may allocate memory in two different ways depending on their size +and certain parameters that may be controlled by users. The most common way is +to allocate portions of memory (called chunks) from a large contiguous area of +memory and manage these areas to optimize their use and reduce wastage in the +form of unusable chunks. Traditionally the system heap was set up to be the one +large memory area but the @glibcadj{} @code{malloc} implementation maintains +multiple such areas to optimize their use in multi-threaded applications. Each +such area is internally referred to as an @dfn{arena}. + +As opposed to other versions, the @code{malloc} in @theglibc{} does not round +up chunk sizes to powers of two, neither for large nor for small sizes. +Neighboring chunks can be coalesced on a @code{free} no matter what their size +is. This makes the implementation suitable for all kinds of allocation +patterns without generally incurring high memory waste through fragmentation. +The presence of multiple arenas allows multiple threads to allocate +memory simultaneously in separate arenas, thus improving performance. + +The other way of memory allocation is for very large blocks, i.e. much larger +than a page. These requests are allocated with @code{mmap} (anonymous or via +@file{/dev/zero}; @pxref{Memory-mapped I/O})). This has the great advantage +that these chunks are returned to the system immediately when they are freed. +Therefore, it cannot happen that a large chunk becomes ``locked'' in between +smaller ones and even after calling @code{free} wastes memory. The size +threshold for @code{mmap} to be used is dynamic and gets adjusted according to +allocation patterns of the program. @code{mallopt} can be used to statically +adjust the threshold using @code{M_MMAP_THRESHOLD} and the use of @code{mmap} +can be disabled completely with @code{M_MMAP_MAX}; +@pxref{Malloc Tunable Parameters}. + +A more detailed technical description of the GNU Allocator is maintained in +the @glibcadj{} wiki. See +@uref{https://sourceware.org/glibc/wiki/MallocInternals}. + +It is possible to use your own custom @code{malloc} instead of the +built-in allocator provided by @theglibc{}. @xref{Replacing malloc}. + +@node Unconstrained Allocation +@subsection Unconstrained Allocation +@cindex unconstrained memory allocation +@cindex @code{malloc} function +@cindex heap, dynamic allocation from + +The most general dynamic allocation facility is @code{malloc}. It +allows you to allocate blocks of memory of any size at any time, make +them bigger or smaller at any time, and free the blocks individually at +any time (or never). + +@menu +* Basic Allocation:: Simple use of @code{malloc}. +* Malloc Examples:: Examples of @code{malloc}. @code{xmalloc}. +* Freeing after Malloc:: Use @code{free} to free a block you + got with @code{malloc}. +* Changing Block Size:: Use @code{realloc} to make a block + bigger or smaller. +* Allocating Cleared Space:: Use @code{calloc} to allocate a + block and clear it. +* Aligned Memory Blocks:: Allocating specially aligned memory. +* Malloc Tunable Parameters:: Use @code{mallopt} to adjust allocation + parameters. +* Heap Consistency Checking:: Automatic checking for errors. +* Hooks for Malloc:: You can use these hooks for debugging + programs that use @code{malloc}. +* Statistics of Malloc:: Getting information about how much + memory your program is using. +* Summary of Malloc:: Summary of @code{malloc} and related functions. +@end menu + +@node Basic Allocation +@subsubsection Basic Memory Allocation +@cindex allocation of memory with @code{malloc} + +To allocate a block of memory, call @code{malloc}. The prototype for +this function is in @file{stdlib.h}. +@pindex stdlib.h + +@comment malloc.h stdlib.h +@comment ISO +@deftypefun {void *} malloc (size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c Malloc hooks and __morecore pointers, as well as such parameters as +@c max_n_mmaps and max_mmapped_mem, are accessed without guards, so they +@c could pose a thread safety issue; in order to not declare malloc +@c MT-unsafe, it's modifying the hooks and parameters while multiple +@c threads are active that is regarded as unsafe. An arena's next field +@c is initialized and never changed again, except for main_arena's, +@c that's protected by list_lock; next_free is only modified while +@c list_lock is held too. All other data members of an arena, as well +@c as the metadata of the memory areas assigned to it, are only modified +@c while holding the arena's mutex (fastbin pointers use catomic ops +@c because they may be modified by free without taking the arena's +@c lock). Some reassurance was needed for fastbins, for it wasn't clear +@c how they were initialized. It turns out they are always +@c zero-initialized: main_arena's, for being static data, and other +@c arena's, for being just-mmapped memory. + +@c Leaking file descriptors and memory in case of cancellation is +@c unavoidable without disabling cancellation, but the lock situation is +@c a bit more complicated: we don't have fallback arenas for malloc to +@c be safe to call from within signal handlers. Error-checking mutexes +@c or trylock could enable us to try and use alternate arenas, even with +@c -DPER_THREAD (enabled by default), but supporting interruption +@c (cancellation or signal handling) while holding the arena list mutex +@c would require more work; maybe blocking signals and disabling async +@c cancellation while manipulating the arena lists? + +@c __libc_malloc @asulock @aculock @acsfd @acsmem +@c force_reg ok +@c *malloc_hook unguarded +@c arena_lock @asulock @aculock @acsfd @acsmem +@c mutex_lock @asulock @aculock +@c arena_get2 @asulock @aculock @acsfd @acsmem +@c get_free_list @asulock @aculock +@c mutex_lock (list_lock) dup @asulock @aculock +@c mutex_unlock (list_lock) dup @aculock +@c mutex_lock (arena lock) dup @asulock @aculock [returns locked] +@c __get_nprocs ext ok @acsfd +@c NARENAS_FROM_NCORES ok +@c catomic_compare_and_exchange_bool_acq ok +@c _int_new_arena ok @asulock @aculock @acsmem +@c new_heap ok @acsmem +@c mmap ok @acsmem +@c munmap ok @acsmem +@c mprotect ok +@c chunk2mem ok +@c set_head ok +@c tsd_setspecific dup ok +@c mutex_init ok +@c mutex_lock (just-created mutex) ok, returns locked +@c mutex_lock (list_lock) dup @asulock @aculock +@c atomic_write_barrier ok +@c mutex_unlock (list_lock) @aculock +@c catomic_decrement ok +@c reused_arena @asulock @aculock +@c reads&writes next_to_use and iterates over arena next without guards +@c those are harmless as long as we don't drop arenas from the +@c NEXT list, and we never do; when a thread terminates, +@c arena_thread_freeres prepends the arena to the free_list +@c NEXT_FREE list, but NEXT is never modified, so it's safe! +@c mutex_trylock (arena lock) @asulock @aculock +@c mutex_lock (arena lock) dup @asulock @aculock +@c tsd_setspecific dup ok +@c _int_malloc @acsfd @acsmem +@c checked_request2size ok +@c REQUEST_OUT_OF_RANGE ok +@c request2size ok +@c get_max_fast ok +@c fastbin_index ok +@c fastbin ok +@c catomic_compare_and_exhange_val_acq ok +@c malloc_printerr dup @mtsenv +@c if we get to it, we're toast already, undefined behavior must have +@c been invoked before +@c libc_message @mtsenv [no leaks with cancellation disabled] +@c FATAL_PREPARE ok +@c pthread_setcancelstate disable ok +@c libc_secure_getenv @mtsenv +@c getenv @mtsenv +@c open_not_cancel_2 dup @acsfd +@c strchrnul ok +@c WRITEV_FOR_FATAL ok +@c writev ok +@c mmap ok @acsmem +@c munmap ok @acsmem +@c BEFORE_ABORT @acsfd +@c backtrace ok +@c write_not_cancel dup ok +@c backtrace_symbols_fd @aculock +@c open_not_cancel_2 dup @acsfd +@c read_not_cancel dup ok +@c close_not_cancel_no_status dup @acsfd +@c abort ok +@c itoa_word ok +@c abort ok +@c check_remalloced_chunk ok/disabled +@c chunk2mem dup ok +@c alloc_perturb ok +@c in_smallbin_range ok +@c smallbin_index ok +@c bin_at ok +@c last ok +@c malloc_consolidate ok +@c get_max_fast dup ok +@c clear_fastchunks ok +@c unsorted_chunks dup ok +@c fastbin dup ok +@c atomic_exchange_acq ok +@c check_inuse_chunk dup ok/disabled +@c chunk_at_offset dup ok +@c chunksize dup ok +@c inuse_bit_at_offset dup ok +@c unlink dup ok +@c clear_inuse_bit_at_offset dup ok +@c in_smallbin_range dup ok +@c set_head dup ok +@c malloc_init_state ok +@c bin_at dup ok +@c set_noncontiguous dup ok +@c set_max_fast dup ok +@c initial_top ok +@c unsorted_chunks dup ok +@c check_malloc_state ok/disabled +@c set_inuse_bit_at_offset ok +@c check_malloced_chunk ok/disabled +@c largebin_index ok +@c have_fastchunks ok +@c unsorted_chunks ok +@c bin_at ok +@c chunksize ok +@c chunk_at_offset ok +@c set_head ok +@c set_foot ok +@c mark_bin ok +@c idx2bit ok +@c first ok +@c unlink ok +@c malloc_printerr dup ok +@c in_smallbin_range dup ok +@c idx2block ok +@c idx2bit dup ok +@c next_bin ok +@c sysmalloc @acsfd @acsmem +@c MMAP @acsmem +@c set_head dup ok +@c check_chunk ok/disabled +@c chunk2mem dup ok +@c chunksize dup ok +@c chunk_at_offset dup ok +@c heap_for_ptr ok +@c grow_heap ok +@c mprotect ok +@c set_head dup ok +@c new_heap @acsmem +@c MMAP dup @acsmem +@c munmap @acsmem +@c top ok +@c set_foot dup ok +@c contiguous ok +@c MORECORE ok +@c *__morecore ok unguarded +@c __default_morecore +@c sbrk ok +@c force_reg dup ok +@c *__after_morecore_hook unguarded +@c set_noncontiguous ok +@c malloc_printerr dup ok +@c _int_free (have_lock) @acsfd @acsmem [@asulock @aculock] +@c chunksize dup ok +@c mutex_unlock dup @aculock/!have_lock +@c malloc_printerr dup ok +@c check_inuse_chunk ok/disabled +@c chunk_at_offset dup ok +@c mutex_lock dup @asulock @aculock/@have_lock +@c chunk2mem dup ok +@c free_perturb ok +@c set_fastchunks ok +@c catomic_and ok +@c fastbin_index dup ok +@c fastbin dup ok +@c catomic_compare_and_exchange_val_rel ok +@c chunk_is_mmapped ok +@c contiguous dup ok +@c prev_inuse ok +@c unlink dup ok +@c inuse_bit_at_offset dup ok +@c clear_inuse_bit_at_offset ok +@c unsorted_chunks dup ok +@c in_smallbin_range dup ok +@c set_head dup ok +@c set_foot dup ok +@c check_free_chunk ok/disabled +@c check_chunk dup ok/disabled +@c have_fastchunks dup ok +@c malloc_consolidate dup ok +@c systrim ok +@c MORECORE dup ok +@c *__after_morecore_hook dup unguarded +@c set_head dup ok +@c check_malloc_state ok/disabled +@c top dup ok +@c heap_for_ptr dup ok +@c heap_trim @acsfd @acsmem +@c top dup ok +@c chunk_at_offset dup ok +@c prev_chunk ok +@c chunksize dup ok +@c prev_inuse dup ok +@c delete_heap @acsmem +@c munmap dup @acsmem +@c unlink dup ok +@c set_head dup ok +@c shrink_heap @acsfd +@c check_may_shrink_heap @acsfd +@c open_not_cancel_2 @acsfd +@c read_not_cancel ok +@c close_not_cancel_no_status @acsfd +@c MMAP dup ok +@c madvise ok +@c munmap_chunk @acsmem +@c chunksize dup ok +@c chunk_is_mmapped dup ok +@c chunk2mem dup ok +@c malloc_printerr dup ok +@c munmap dup @acsmem +@c check_malloc_state ok/disabled +@c arena_get_retry @asulock @aculock @acsfd @acsmem +@c mutex_unlock dup @aculock +@c mutex_lock dup @asulock @aculock +@c arena_get2 dup @asulock @aculock @acsfd @acsmem +@c mutex_unlock @aculock +@c mem2chunk ok +@c chunk_is_mmapped ok +@c arena_for_chunk ok +@c chunk_non_main_arena ok +@c heap_for_ptr ok +This function returns a pointer to a newly allocated block @var{size} +bytes long, or a null pointer if the block could not be allocated. +@end deftypefun + +The contents of the block are undefined; you must initialize it yourself +(or use @code{calloc} instead; @pxref{Allocating Cleared Space}). +Normally you would cast the value as a pointer to the kind of object +that you want to store in the block. Here we show an example of doing +so, and of initializing the space with zeros using the library function +@code{memset} (@pxref{Copying Strings and Arrays}): + +@smallexample +struct foo *ptr; +@dots{} +ptr = (struct foo *) malloc (sizeof (struct foo)); +if (ptr == 0) abort (); +memset (ptr, 0, sizeof (struct foo)); +@end smallexample + +You can store the result of @code{malloc} into any pointer variable +without a cast, because @w{ISO C} automatically converts the type +@code{void *} to another type of pointer when necessary. But the cast +is necessary in contexts other than assignment operators or if you might +want your code to run in traditional C. + +Remember that when allocating space for a string, the argument to +@code{malloc} must be one plus the length of the string. This is +because a string is terminated with a null character that doesn't count +in the ``length'' of the string but does need space. For example: + +@smallexample +char *ptr; +@dots{} +ptr = (char *) malloc (length + 1); +@end smallexample + +@noindent +@xref{Representation of Strings}, for more information about this. + +@node Malloc Examples +@subsubsection Examples of @code{malloc} + +If no more space is available, @code{malloc} returns a null pointer. +You should check the value of @emph{every} call to @code{malloc}. It is +useful to write a subroutine that calls @code{malloc} and reports an +error if the value is a null pointer, returning only if the value is +nonzero. This function is conventionally called @code{xmalloc}. Here +it is: + +@smallexample +void * +xmalloc (size_t size) +@{ + void *value = malloc (size); + if (value == 0) + fatal ("virtual memory exhausted"); + return value; +@} +@end smallexample + +Here is a real example of using @code{malloc} (by way of @code{xmalloc}). +The function @code{savestring} will copy a sequence of characters into +a newly allocated null-terminated string: + +@smallexample +@group +char * +savestring (const char *ptr, size_t len) +@{ + char *value = (char *) xmalloc (len + 1); + value[len] = '\0'; + return (char *) memcpy (value, ptr, len); +@} +@end group +@end smallexample + +The block that @code{malloc} gives you is guaranteed to be aligned so +that it can hold any type of data. On @gnusystems{}, the address is +always a multiple of eight on 32-bit systems, and a multiple of 16 on +64-bit systems. Only rarely is any higher boundary (such as a page +boundary) necessary; for those cases, use @code{aligned_alloc} or +@code{posix_memalign} (@pxref{Aligned Memory Blocks}). + +Note that the memory located after the end of the block is likely to be +in use for something else; perhaps a block already allocated by another +call to @code{malloc}. If you attempt to treat the block as longer than +you asked for it to be, you are liable to destroy the data that +@code{malloc} uses to keep track of its blocks, or you may destroy the +contents of another block. If you have already allocated a block and +discover you want it to be bigger, use @code{realloc} (@pxref{Changing +Block Size}). + +@node Freeing after Malloc +@subsubsection Freeing Memory Allocated with @code{malloc} +@cindex freeing memory allocated with @code{malloc} +@cindex heap, freeing memory from + +When you no longer need a block that you got with @code{malloc}, use the +function @code{free} to make the block available to be allocated again. +The prototype for this function is in @file{stdlib.h}. +@pindex stdlib.h + +@comment malloc.h stdlib.h +@comment ISO +@deftypefun void free (void *@var{ptr}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c __libc_free @asulock @aculock @acsfd @acsmem +@c releasing memory into fastbins modifies the arena without taking +@c its mutex, but catomic operations ensure safety. If two (or more) +@c threads are running malloc and have their own arenas locked when +@c each gets a signal whose handler free()s large (non-fastbin-able) +@c blocks from each other's arena, we deadlock; this is a more general +@c case of @asulock. +@c *__free_hook unguarded +@c mem2chunk ok +@c chunk_is_mmapped ok, chunk bits not modified after allocation +@c chunksize ok +@c munmap_chunk dup @acsmem +@c arena_for_chunk dup ok +@c _int_free (!have_lock) dup @asulock @aculock @acsfd @acsmem +The @code{free} function deallocates the block of memory pointed at +by @var{ptr}. +@end deftypefun + +Freeing a block alters the contents of the block. @strong{Do not expect to +find any data (such as a pointer to the next block in a chain of blocks) in +the block after freeing it.} Copy whatever you need out of the block before +freeing it! Here is an example of the proper way to free all the blocks in +a chain, and the strings that they point to: + +@smallexample +struct chain + @{ + struct chain *next; + char *name; + @} + +void +free_chain (struct chain *chain) +@{ + while (chain != 0) + @{ + struct chain *next = chain->next; + free (chain->name); + free (chain); + chain = next; + @} +@} +@end smallexample + +Occasionally, @code{free} can actually return memory to the operating +system and make the process smaller. Usually, all it can do is allow a +later call to @code{malloc} to reuse the space. In the meantime, the +space remains in your program as part of a free-list used internally by +@code{malloc}. + +There is no point in freeing blocks at the end of a program, because all +of the program's space is given back to the system when the process +terminates. + +@node Changing Block Size +@subsubsection Changing the Size of a Block +@cindex changing the size of a block (@code{malloc}) + +Often you do not know for certain how big a block you will ultimately need +at the time you must begin to use the block. For example, the block might +be a buffer that you use to hold a line being read from a file; no matter +how long you make the buffer initially, you may encounter a line that is +longer. + +You can make the block longer by calling @code{realloc} or +@code{reallocarray}. These functions are declared in @file{stdlib.h}. +@pindex stdlib.h + +@comment malloc.h stdlib.h +@comment ISO +@deftypefun {void *} realloc (void *@var{ptr}, size_t @var{newsize}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c It may call the implementations of malloc and free, so all of their +@c issues arise, plus the realloc hook, also accessed without guards. + +@c __libc_realloc @asulock @aculock @acsfd @acsmem +@c *__realloc_hook unguarded +@c __libc_free dup @asulock @aculock @acsfd @acsmem +@c __libc_malloc dup @asulock @aculock @acsfd @acsmem +@c mem2chunk dup ok +@c chunksize dup ok +@c malloc_printerr dup ok +@c checked_request2size dup ok +@c chunk_is_mmapped dup ok +@c mremap_chunk +@c chunksize dup ok +@c __mremap ok +@c set_head dup ok +@c MALLOC_COPY ok +@c memcpy ok +@c munmap_chunk dup @acsmem +@c arena_for_chunk dup ok +@c mutex_lock (arena mutex) dup @asulock @aculock +@c _int_realloc @acsfd @acsmem +@c malloc_printerr dup ok +@c check_inuse_chunk dup ok/disabled +@c chunk_at_offset dup ok +@c chunksize dup ok +@c set_head_size dup ok +@c chunk_at_offset dup ok +@c set_head dup ok +@c chunk2mem dup ok +@c inuse dup ok +@c unlink dup ok +@c _int_malloc dup @acsfd @acsmem +@c mem2chunk dup ok +@c MALLOC_COPY dup ok +@c _int_free (have_lock) dup @acsfd @acsmem +@c set_inuse_bit_at_offset dup ok +@c set_head dup ok +@c mutex_unlock (arena mutex) dup @aculock +@c _int_free (!have_lock) dup @asulock @aculock @acsfd @acsmem + +The @code{realloc} function changes the size of the block whose address is +@var{ptr} to be @var{newsize}. + +Since the space after the end of the block may be in use, @code{realloc} +may find it necessary to copy the block to a new address where more free +space is available. The value of @code{realloc} is the new address of the +block. If the block needs to be moved, @code{realloc} copies the old +contents. + +If you pass a null pointer for @var{ptr}, @code{realloc} behaves just +like @samp{malloc (@var{newsize})}. This can be convenient, but beware +that older implementations (before @w{ISO C}) may not support this +behavior, and will probably crash when @code{realloc} is passed a null +pointer. +@end deftypefun + +@comment malloc.h stdlib.h +@comment BSD +@deftypefun {void *} reallocarray (void *@var{ptr}, size_t @var{nmemb}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} + +The @code{reallocarray} function changes the size of the block whose address +is @var{ptr} to be long enough to contain a vector of @var{nmemb} elements, +each of size @var{size}. It is equivalent to @samp{realloc (@var{ptr}, +@var{nmemb} * @var{size})}, except that @code{reallocarray} fails safely if +the multiplication overflows, by setting @code{errno} to @code{ENOMEM}, +returning a null pointer, and leaving the original block unchanged. + +@code{reallocarray} should be used instead of @code{realloc} when the new size +of the allocated block is the result of a multiplication that might overflow. + +@strong{Portability Note:} This function is not part of any standard. It was +first introduced in OpenBSD 5.6. +@end deftypefun + +Like @code{malloc}, @code{realloc} and @code{reallocarray} may return a null +pointer if no memory space is available to make the block bigger. When this +happens, the original block is untouched; it has not been modified or +relocated. + +In most cases it makes no difference what happens to the original block +when @code{realloc} fails, because the application program cannot continue +when it is out of memory, and the only thing to do is to give a fatal error +message. Often it is convenient to write and use a subroutine, +conventionally called @code{xrealloc}, that takes care of the error message +as @code{xmalloc} does for @code{malloc}: + +@smallexample +void * +xrealloc (void *ptr, size_t size) +@{ + void *value = realloc (ptr, size); + if (value == 0) + fatal ("Virtual memory exhausted"); + return value; +@} +@end smallexample + +You can also use @code{realloc} or @code{reallocarray} to make a block +smaller. The reason you would do this is to avoid tying up a lot of memory +space when only a little is needed. +@comment The following is no longer true with the new malloc. +@comment But it seems wise to keep the warning for other implementations. +In several allocation implementations, making a block smaller sometimes +necessitates copying it, so it can fail if no other space is available. + +If the new size you specify is the same as the old size, @code{realloc} and +@code{reallocarray} are guaranteed to change nothing and return the same +address that you gave. + +@node Allocating Cleared Space +@subsubsection Allocating Cleared Space + +The function @code{calloc} allocates memory and clears it to zero. It +is declared in @file{stdlib.h}. +@pindex stdlib.h + +@comment malloc.h stdlib.h +@comment ISO +@deftypefun {void *} calloc (size_t @var{count}, size_t @var{eltsize}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c Same caveats as malloc. + +@c __libc_calloc @asulock @aculock @acsfd @acsmem +@c *__malloc_hook dup unguarded +@c memset dup ok +@c arena_get @asulock @aculock @acsfd @acsmem +@c arena_lock dup @asulock @aculock @acsfd @acsmem +@c top dup ok +@c chunksize dup ok +@c heap_for_ptr dup ok +@c _int_malloc dup @acsfd @acsmem +@c arena_get_retry dup @asulock @aculock @acsfd @acsmem +@c mutex_unlock dup @aculock +@c mem2chunk dup ok +@c chunk_is_mmapped dup ok +@c MALLOC_ZERO ok +@c memset dup ok +This function allocates a block long enough to contain a vector of +@var{count} elements, each of size @var{eltsize}. Its contents are +cleared to zero before @code{calloc} returns. +@end deftypefun + +You could define @code{calloc} as follows: + +@smallexample +void * +calloc (size_t count, size_t eltsize) +@{ + size_t size = count * eltsize; + void *value = malloc (size); + if (value != 0) + memset (value, 0, size); + return value; +@} +@end smallexample + +But in general, it is not guaranteed that @code{calloc} calls +@code{malloc} internally. Therefore, if an application provides its own +@code{malloc}/@code{realloc}/@code{free} outside the C library, it +should always define @code{calloc}, too. + +@node Aligned Memory Blocks +@subsubsection Allocating Aligned Memory Blocks + +@cindex page boundary +@cindex alignment (with @code{malloc}) +@pindex stdlib.h +The address of a block returned by @code{malloc} or @code{realloc} in +@gnusystems{} is always a multiple of eight (or sixteen on 64-bit +systems). If you need a block whose address is a multiple of a higher +power of two than that, use @code{aligned_alloc} or @code{posix_memalign}. +@code{aligned_alloc} and @code{posix_memalign} are declared in +@file{stdlib.h}. + +@comment stdlib.h +@deftypefun {void *} aligned_alloc (size_t @var{alignment}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c Alias to memalign. +The @code{aligned_alloc} function allocates a block of @var{size} bytes whose +address is a multiple of @var{alignment}. The @var{alignment} must be a +power of two and @var{size} must be a multiple of @var{alignment}. + +The @code{aligned_alloc} function returns a null pointer on error and sets +@code{errno} to one of the following values: + +@table @code +@item ENOMEM +There was insufficient memory available to satisfy the request. + +@item EINVAL +@var{alignment} is not a power of two. + +This function was introduced in @w{ISO C11} and hence may have better +portability to modern non-POSIX systems than @code{posix_memalign}. +@end table + +@end deftypefun + +@comment malloc.h +@comment BSD +@deftypefun {void *} memalign (size_t @var{boundary}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c Same issues as malloc. The padding bytes are safely freed in +@c _int_memalign, with the arena still locked. + +@c __libc_memalign @asulock @aculock @acsfd @acsmem +@c *__memalign_hook dup unguarded +@c __libc_malloc dup @asulock @aculock @acsfd @acsmem +@c arena_get dup @asulock @aculock @acsfd @acsmem +@c _int_memalign @acsfd @acsmem +@c _int_malloc dup @acsfd @acsmem +@c checked_request2size dup ok +@c mem2chunk dup ok +@c chunksize dup ok +@c chunk_is_mmapped dup ok +@c set_head dup ok +@c chunk2mem dup ok +@c set_inuse_bit_at_offset dup ok +@c set_head_size dup ok +@c _int_free (have_lock) dup @acsfd @acsmem +@c chunk_at_offset dup ok +@c check_inuse_chunk dup ok +@c arena_get_retry dup @asulock @aculock @acsfd @acsmem +@c mutex_unlock dup @aculock +The @code{memalign} function allocates a block of @var{size} bytes whose +address is a multiple of @var{boundary}. The @var{boundary} must be a +power of two! The function @code{memalign} works by allocating a +somewhat larger block, and then returning an address within the block +that is on the specified boundary. + +The @code{memalign} function returns a null pointer on error and sets +@code{errno} to one of the following values: + +@table @code +@item ENOMEM +There was insufficient memory available to satisfy the request. + +@item EINVAL +@var{boundary} is not a power of two. + +@end table + +The @code{memalign} function is obsolete and @code{aligned_alloc} or +@code{posix_memalign} should be used instead. +@end deftypefun + +@comment stdlib.h +@comment POSIX +@deftypefun int posix_memalign (void **@var{memptr}, size_t @var{alignment}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c Calls memalign unless the requirements are not met (powerof2 macro is +@c safe given an automatic variable as an argument) or there's a +@c memalign hook (accessed unguarded, but safely). +The @code{posix_memalign} function is similar to the @code{memalign} +function in that it returns a buffer of @var{size} bytes aligned to a +multiple of @var{alignment}. But it adds one requirement to the +parameter @var{alignment}: the value must be a power of two multiple of +@code{sizeof (void *)}. + +If the function succeeds in allocation memory a pointer to the allocated +memory is returned in @code{*@var{memptr}} and the return value is zero. +Otherwise the function returns an error value indicating the problem. +The possible error values returned are: + +@table @code +@item ENOMEM +There was insufficient memory available to satisfy the request. + +@item EINVAL +@var{alignment} is not a power of two multiple of @code{sizeof (void *)}. + +@end table + +This function was introduced in POSIX 1003.1d. Although this function is +superseded by @code{aligned_alloc}, it is more portable to older POSIX +systems that do not support @w{ISO C11}. +@end deftypefun + +@comment malloc.h stdlib.h +@comment BSD +@deftypefun {void *} valloc (size_t @var{size}) +@safety{@prelim{}@mtunsafe{@mtuinit{}}@asunsafe{@asuinit{} @asulock{}}@acunsafe{@acuinit{} @aculock{} @acsfd{} @acsmem{}}} +@c __libc_valloc @mtuinit @asuinit @asulock @aculock @acsfd @acsmem +@c ptmalloc_init (once) @mtsenv @asulock @aculock @acsfd @acsmem +@c _dl_addr @asucorrupt? @aculock +@c __rtld_lock_lock_recursive (dl_load_lock) @asucorrupt? @aculock +@c _dl_find_dso_for_object ok, iterates over dl_ns and its _ns_loaded objs +@c the ok above assumes no partial updates on dl_ns and _ns_loaded +@c that could confuse a _dl_addr call in a signal handler +@c _dl_addr_inside_object ok +@c determine_info ok +@c __rtld_lock_unlock_recursive (dl_load_lock) @aculock +@c *_environ @mtsenv +@c next_env_entry ok +@c strcspn dup ok +@c __libc_mallopt dup @mtasuconst:mallopt [setting mp_] +@c __malloc_check_init @mtasuconst:malloc_hooks [setting hooks] +@c *__malloc_initialize_hook unguarded, ok +@c *__memalign_hook dup ok, unguarded +@c arena_get dup @asulock @aculock @acsfd @acsmem +@c _int_valloc @acsfd @acsmem +@c malloc_consolidate dup ok +@c _int_memalign dup @acsfd @acsmem +@c arena_get_retry dup @asulock @aculock @acsfd @acsmem +@c _int_memalign dup @acsfd @acsmem +@c mutex_unlock dup @aculock +Using @code{valloc} is like using @code{memalign} and passing the page size +as the value of the first argument. It is implemented like this: + +@smallexample +void * +valloc (size_t size) +@{ + return memalign (getpagesize (), size); +@} +@end smallexample + +@ref{Query Memory Parameters} for more information about the memory +subsystem. + +The @code{valloc} function is obsolete and @code{aligned_alloc} or +@code{posix_memalign} should be used instead. +@end deftypefun + +@node Malloc Tunable Parameters +@subsubsection Malloc Tunable Parameters + +You can adjust some parameters for dynamic memory allocation with the +@code{mallopt} function. This function is the general SVID/XPG +interface, defined in @file{malloc.h}. +@pindex malloc.h + +@deftypefun int mallopt (int @var{param}, int @var{value}) +@safety{@prelim{}@mtunsafe{@mtuinit{} @mtasuconst{:mallopt}}@asunsafe{@asuinit{} @asulock{}}@acunsafe{@acuinit{} @aculock{}}} +@c __libc_mallopt @mtuinit @mtasuconst:mallopt @asuinit @asulock @aculock +@c ptmalloc_init (once) dup @mtsenv @asulock @aculock @acsfd @acsmem +@c mutex_lock (main_arena->mutex) @asulock @aculock +@c malloc_consolidate dup ok +@c set_max_fast ok +@c mutex_unlock dup @aculock + +When calling @code{mallopt}, the @var{param} argument specifies the +parameter to be set, and @var{value} the new value to be set. Possible +choices for @var{param}, as defined in @file{malloc.h}, are: + +@comment TODO: @item M_CHECK_ACTION +@vtable @code +@item M_MMAP_MAX +The maximum number of chunks to allocate with @code{mmap}. Setting this +to zero disables all use of @code{mmap}. + +The default value of this parameter is @code{65536}. + +This parameter can also be set for the process at startup by setting the +environment variable @env{MALLOC_MMAP_MAX_} to the desired value. + +@item M_MMAP_THRESHOLD +All chunks larger than this value are allocated outside the normal +heap, using the @code{mmap} system call. This way it is guaranteed +that the memory for these chunks can be returned to the system on +@code{free}. Note that requests smaller than this threshold might still +be allocated via @code{mmap}. + +If this parameter is not set, the default value is set as 128 KiB and the +threshold is adjusted dynamically to suit the allocation patterns of the +program. If the parameter is set, the dynamic adjustment is disabled and the +value is set statically to the input value. + +This parameter can also be set for the process at startup by setting the +environment variable @env{MALLOC_MMAP_THRESHOLD_} to the desired value. +@comment TODO: @item M_MXFAST + +@item M_PERTURB +If non-zero, memory blocks are filled with values depending on some +low order bits of this parameter when they are allocated (except when +allocated by @code{calloc}) and freed. This can be used to debug the +use of uninitialized or freed heap memory. Note that this option does not +guarantee that the freed block will have any specific values. It only +guarantees that the content the block had before it was freed will be +overwritten. + +The default value of this parameter is @code{0}. + +This parameter can also be set for the process at startup by setting the +environment variable @env{MALLOC_MMAP_PERTURB_} to the desired value. + +@item M_TOP_PAD +This parameter determines the amount of extra memory to obtain from the system +when an arena needs to be extended. It also specifies the number of bytes to +retain when shrinking an arena. This provides the necessary hysteresis in heap +size such that excessive amounts of system calls can be avoided. + +The default value of this parameter is @code{0}. + +This parameter can also be set for the process at startup by setting the +environment variable @env{MALLOC_TOP_PAD_} to the desired value. + +@item M_TRIM_THRESHOLD +This is the minimum size (in bytes) of the top-most, releasable chunk +that will trigger a system call in order to return memory to the system. + +If this parameter is not set, the default value is set as 128 KiB and the +threshold is adjusted dynamically to suit the allocation patterns of the +program. If the parameter is set, the dynamic adjustment is disabled and the +value is set statically to the provided input. + +This parameter can also be set for the process at startup by setting the +environment variable @env{MALLOC_TRIM_THRESHOLD_} to the desired value. + +@item M_ARENA_TEST +This parameter specifies the number of arenas that can be created before the +test on the limit to the number of arenas is conducted. The value is ignored if +@code{M_ARENA_MAX} is set. + +The default value of this parameter is 2 on 32-bit systems and 8 on 64-bit +systems. + +This parameter can also be set for the process at startup by setting the +environment variable @env{MALLOC_ARENA_TEST} to the desired value. + +@item M_ARENA_MAX +This parameter sets the number of arenas to use regardless of the number of +cores in the system. + +The default value of this tunable is @code{0}, meaning that the limit on the +number of arenas is determined by the number of CPU cores online. For 32-bit +systems the limit is twice the number of cores online and on 64-bit systems, it +is eight times the number of cores online. Note that the default value is not +derived from the default value of M_ARENA_TEST and is computed independently. + +This parameter can also be set for the process at startup by setting the +environment variable @env{MALLOC_ARENA_MAX} to the desired value. +@end vtable + +@end deftypefun + +@node Heap Consistency Checking +@subsubsection Heap Consistency Checking + +@cindex heap consistency checking +@cindex consistency checking, of heap + +You can ask @code{malloc} to check the consistency of dynamic memory by +using the @code{mcheck} function. This function is a GNU extension, +declared in @file{mcheck.h}. +@pindex mcheck.h + +@comment mcheck.h +@comment GNU +@deftypefun int mcheck (void (*@var{abortfn}) (enum mcheck_status @var{status})) +@safety{@prelim{}@mtunsafe{@mtasurace{:mcheck} @mtasuconst{:malloc_hooks}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +@c The hooks must be set up before malloc is first used, which sort of +@c implies @mtuinit/@asuinit but since the function is a no-op if malloc +@c was already used, that doesn't pose any safety issues. The actual +@c problem is with the hooks, designed for single-threaded +@c fully-synchronous operation: they manage an unguarded linked list of +@c allocated blocks, and get temporarily overwritten before calling the +@c allocation functions recursively while holding the old hooks. There +@c are no guards for thread safety, and inconsistent hooks may be found +@c within signal handlers or left behind in case of cancellation. + +Calling @code{mcheck} tells @code{malloc} to perform occasional +consistency checks. These will catch things such as writing +past the end of a block that was allocated with @code{malloc}. + +The @var{abortfn} argument is the function to call when an inconsistency +is found. If you supply a null pointer, then @code{mcheck} uses a +default function which prints a message and calls @code{abort} +(@pxref{Aborting a Program}). The function you supply is called with +one argument, which says what sort of inconsistency was detected; its +type is described below. + +It is too late to begin allocation checking once you have allocated +anything with @code{malloc}. So @code{mcheck} does nothing in that +case. The function returns @code{-1} if you call it too late, and +@code{0} otherwise (when it is successful). + +The easiest way to arrange to call @code{mcheck} early enough is to use +the option @samp{-lmcheck} when you link your program; then you don't +need to modify your program source at all. Alternatively you might use +a debugger to insert a call to @code{mcheck} whenever the program is +started, for example these gdb commands will automatically call @code{mcheck} +whenever the program starts: + +@smallexample +(gdb) break main +Breakpoint 1, main (argc=2, argv=0xbffff964) at whatever.c:10 +(gdb) command 1 +Type commands for when breakpoint 1 is hit, one per line. +End with a line saying just "end". +>call mcheck(0) +>continue +>end +(gdb) @dots{} +@end smallexample + +This will however only work if no initialization function of any object +involved calls any of the @code{malloc} functions since @code{mcheck} +must be called before the first such function. + +@end deftypefun + +@deftypefun {enum mcheck_status} mprobe (void *@var{pointer}) +@safety{@prelim{}@mtunsafe{@mtasurace{:mcheck} @mtasuconst{:malloc_hooks}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +@c The linked list of headers may be modified concurrently by other +@c threads, and it may find a partial update if called from a signal +@c handler. It's mostly read only, so cancelling it might be safe, but +@c it will modify global state that, if cancellation hits at just the +@c right spot, may be left behind inconsistent. This path is only taken +@c if checkhdr finds an inconsistency. If the inconsistency could only +@c occur because of earlier undefined behavior, that wouldn't be an +@c additional safety issue problem, but because of the other concurrency +@c issues in the mcheck hooks, the apparent inconsistency could be the +@c result of mcheck's own internal data race. So, AC-Unsafe it is. + +The @code{mprobe} function lets you explicitly check for inconsistencies +in a particular allocated block. You must have already called +@code{mcheck} at the beginning of the program, to do its occasional +checks; calling @code{mprobe} requests an additional consistency check +to be done at the time of the call. + +The argument @var{pointer} must be a pointer returned by @code{malloc} +or @code{realloc}. @code{mprobe} returns a value that says what +inconsistency, if any, was found. The values are described below. +@end deftypefun + +@deftp {Data Type} {enum mcheck_status} +This enumerated type describes what kind of inconsistency was detected +in an allocated block, if any. Here are the possible values: + +@table @code +@item MCHECK_DISABLED +@code{mcheck} was not called before the first allocation. +No consistency checking can be done. +@item MCHECK_OK +No inconsistency detected. +@item MCHECK_HEAD +The data immediately before the block was modified. +This commonly happens when an array index or pointer +is decremented too far. +@item MCHECK_TAIL +The data immediately after the block was modified. +This commonly happens when an array index or pointer +is incremented too far. +@item MCHECK_FREE +The block was already freed. +@end table +@end deftp + +Another possibility to check for and guard against bugs in the use of +@code{malloc}, @code{realloc} and @code{free} is to set the environment +variable @code{MALLOC_CHECK_}. When @code{MALLOC_CHECK_} is set, a +special (less efficient) implementation is used which is designed to be +tolerant against simple errors, such as double calls of @code{free} with +the same argument, or overruns of a single byte (off-by-one bugs). Not +all such errors can be protected against, however, and memory leaks can +result. If @code{MALLOC_CHECK_} is set to @code{0}, any detected heap +corruption is silently ignored; if set to @code{1}, a diagnostic is +printed on @code{stderr}; if set to @code{2}, @code{abort} is called +immediately. This can be useful because otherwise a crash may happen +much later, and the true cause for the problem is then very hard to +track down. + +There is one problem with @code{MALLOC_CHECK_}: in SUID or SGID binaries +it could possibly be exploited since diverging from the normal programs +behavior it now writes something to the standard error descriptor. +Therefore the use of @code{MALLOC_CHECK_} is disabled by default for +SUID and SGID binaries. It can be enabled again by the system +administrator by adding a file @file{/etc/suid-debug} (the content is +not important it could be empty). + +So, what's the difference between using @code{MALLOC_CHECK_} and linking +with @samp{-lmcheck}? @code{MALLOC_CHECK_} is orthogonal with respect to +@samp{-lmcheck}. @samp{-lmcheck} has been added for backward +compatibility. Both @code{MALLOC_CHECK_} and @samp{-lmcheck} should +uncover the same bugs - but using @code{MALLOC_CHECK_} you don't need to +recompile your application. + +@node Hooks for Malloc +@subsubsection Memory Allocation Hooks +@cindex allocation hooks, for @code{malloc} + +@Theglibc{} lets you modify the behavior of @code{malloc}, +@code{realloc}, and @code{free} by specifying appropriate hook +functions. You can use these hooks to help you debug programs that use +dynamic memory allocation, for example. + +The hook variables are declared in @file{malloc.h}. +@pindex malloc.h + +@comment malloc.h +@comment GNU +@defvar __malloc_hook +The value of this variable is a pointer to the function that +@code{malloc} uses whenever it is called. You should define this +function to look like @code{malloc}; that is, like: + +@smallexample +void *@var{function} (size_t @var{size}, const void *@var{caller}) +@end smallexample + +The value of @var{caller} is the return address found on the stack when +the @code{malloc} function was called. This value allows you to trace +the memory consumption of the program. +@end defvar + +@comment malloc.h +@comment GNU +@defvar __realloc_hook +The value of this variable is a pointer to function that @code{realloc} +uses whenever it is called. You should define this function to look +like @code{realloc}; that is, like: + +@smallexample +void *@var{function} (void *@var{ptr}, size_t @var{size}, const void *@var{caller}) +@end smallexample + +The value of @var{caller} is the return address found on the stack when +the @code{realloc} function was called. This value allows you to trace the +memory consumption of the program. +@end defvar + +@comment malloc.h +@comment GNU +@defvar __free_hook +The value of this variable is a pointer to function that @code{free} +uses whenever it is called. You should define this function to look +like @code{free}; that is, like: + +@smallexample +void @var{function} (void *@var{ptr}, const void *@var{caller}) +@end smallexample + +The value of @var{caller} is the return address found on the stack when +the @code{free} function was called. This value allows you to trace the +memory consumption of the program. +@end defvar + +@comment malloc.h +@comment GNU +@defvar __memalign_hook +The value of this variable is a pointer to function that @code{aligned_alloc}, +@code{memalign}, @code{posix_memalign} and @code{valloc} use whenever they +are called. You should define this function to look like @code{aligned_alloc}; +that is, like: + +@smallexample +void *@var{function} (size_t @var{alignment}, size_t @var{size}, const void *@var{caller}) +@end smallexample + +The value of @var{caller} is the return address found on the stack when +the @code{aligned_alloc}, @code{memalign}, @code{posix_memalign} or +@code{valloc} functions are called. This value allows you to trace the +memory consumption of the program. +@end defvar + +You must make sure that the function you install as a hook for one of +these functions does not call that function recursively without restoring +the old value of the hook first! Otherwise, your program will get stuck +in an infinite recursion. Before calling the function recursively, one +should make sure to restore all the hooks to their previous value. When +coming back from the recursive call, all the hooks should be resaved +since a hook might modify itself. + +An issue to look out for is the time at which the malloc hook functions +can be safely installed. If the hook functions call the malloc-related +functions recursively, it is necessary that malloc has already properly +initialized itself at the time when @code{__malloc_hook} etc. is +assigned to. On the other hand, if the hook functions provide a +complete malloc implementation of their own, it is vital that the hooks +are assigned to @emph{before} the very first @code{malloc} call has +completed, because otherwise a chunk obtained from the ordinary, +un-hooked malloc may later be handed to @code{__free_hook}, for example. + +Here is an example showing how to use @code{__malloc_hook} and +@code{__free_hook} properly. It installs a function that prints out +information every time @code{malloc} or @code{free} is called. We just +assume here that @code{realloc} and @code{memalign} are not used in our +program. + +@smallexample +/* Prototypes for __malloc_hook, __free_hook */ +#include <malloc.h> + +/* Prototypes for our hooks. */ +static void my_init_hook (void); +static void *my_malloc_hook (size_t, const void *); +static void my_free_hook (void*, const void *); + +static void +my_init (void) +@{ + old_malloc_hook = __malloc_hook; + old_free_hook = __free_hook; + __malloc_hook = my_malloc_hook; + __free_hook = my_free_hook; +@} + +static void * +my_malloc_hook (size_t size, const void *caller) +@{ + void *result; + /* Restore all old hooks */ + __malloc_hook = old_malloc_hook; + __free_hook = old_free_hook; + /* Call recursively */ + result = malloc (size); + /* Save underlying hooks */ + old_malloc_hook = __malloc_hook; + old_free_hook = __free_hook; + /* @r{@code{printf} might call @code{malloc}, so protect it too.} */ + printf ("malloc (%u) returns %p\n", (unsigned int) size, result); + /* Restore our own hooks */ + __malloc_hook = my_malloc_hook; + __free_hook = my_free_hook; + return result; +@} + +static void +my_free_hook (void *ptr, const void *caller) +@{ + /* Restore all old hooks */ + __malloc_hook = old_malloc_hook; + __free_hook = old_free_hook; + /* Call recursively */ + free (ptr); + /* Save underlying hooks */ + old_malloc_hook = __malloc_hook; + old_free_hook = __free_hook; + /* @r{@code{printf} might call @code{free}, so protect it too.} */ + printf ("freed pointer %p\n", ptr); + /* Restore our own hooks */ + __malloc_hook = my_malloc_hook; + __free_hook = my_free_hook; +@} + +main () +@{ + my_init (); + @dots{} +@} +@end smallexample + +The @code{mcheck} function (@pxref{Heap Consistency Checking}) works by +installing such hooks. + +@c __morecore, __after_morecore_hook are undocumented +@c It's not clear whether to document them. + +@node Statistics of Malloc +@subsubsection Statistics for Memory Allocation with @code{malloc} + +@cindex allocation statistics +You can get information about dynamic memory allocation by calling the +@code{mallinfo} function. This function and its associated data type +are declared in @file{malloc.h}; they are an extension of the standard +SVID/XPG version. +@pindex malloc.h + +@comment malloc.h +@comment GNU +@deftp {Data Type} {struct mallinfo} +This structure type is used to return information about the dynamic +memory allocator. It contains the following members: + +@table @code +@item int arena +This is the total size of memory allocated with @code{sbrk} by +@code{malloc}, in bytes. + +@item int ordblks +This is the number of chunks not in use. (The memory allocator +internally gets chunks of memory from the operating system, and then +carves them up to satisfy individual @code{malloc} requests; +@pxref{The GNU Allocator}.) + +@item int smblks +This field is unused. + +@item int hblks +This is the total number of chunks allocated with @code{mmap}. + +@item int hblkhd +This is the total size of memory allocated with @code{mmap}, in bytes. + +@item int usmblks +This field is unused and always 0. + +@item int fsmblks +This field is unused. + +@item int uordblks +This is the total size of memory occupied by chunks handed out by +@code{malloc}. + +@item int fordblks +This is the total size of memory occupied by free (not in use) chunks. + +@item int keepcost +This is the size of the top-most releasable chunk that normally +borders the end of the heap (i.e., the high end of the virtual address +space's data segment). + +@end table +@end deftp + +@comment malloc.h +@comment SVID +@deftypefun {struct mallinfo} mallinfo (void) +@safety{@prelim{}@mtunsafe{@mtuinit{} @mtasuconst{:mallopt}}@asunsafe{@asuinit{} @asulock{}}@acunsafe{@acuinit{} @aculock{}}} +@c Accessing mp_.n_mmaps and mp_.max_mmapped_mem, modified with atomics +@c but non-atomically elsewhere, may get us inconsistent results. We +@c mark the statistics as unsafe, rather than the fast-path functions +@c that collect the possibly inconsistent data. + +@c __libc_mallinfo @mtuinit @mtasuconst:mallopt @asuinit @asulock @aculock +@c ptmalloc_init (once) dup @mtsenv @asulock @aculock @acsfd @acsmem +@c mutex_lock dup @asulock @aculock +@c int_mallinfo @mtasuconst:mallopt [mp_ access on main_arena] +@c malloc_consolidate dup ok +@c check_malloc_state dup ok/disabled +@c chunksize dup ok +@c fastbin dupo ok +@c bin_at dup ok +@c last dup ok +@c mutex_unlock @aculock + +This function returns information about the current dynamic memory usage +in a structure of type @code{struct mallinfo}. +@end deftypefun + +@node Summary of Malloc +@subsubsection Summary of @code{malloc}-Related Functions + +Here is a summary of the functions that work with @code{malloc}: + +@table @code +@item void *malloc (size_t @var{size}) +Allocate a block of @var{size} bytes. @xref{Basic Allocation}. + +@item void free (void *@var{addr}) +Free a block previously allocated by @code{malloc}. @xref{Freeing after +Malloc}. + +@item void *realloc (void *@var{addr}, size_t @var{size}) +Make a block previously allocated by @code{malloc} larger or smaller, +possibly by copying it to a new location. @xref{Changing Block Size}. + +@item void *reallocarray (void *@var{ptr}, size_t @var{nmemb}, size_t @var{size}) +Change the size of a block previously allocated by @code{malloc} to +@code{@var{nmemb} * @var{size}} bytes as with @code{realloc}. @xref{Changing +Block Size}. + +@item void *calloc (size_t @var{count}, size_t @var{eltsize}) +Allocate a block of @var{count} * @var{eltsize} bytes using +@code{malloc}, and set its contents to zero. @xref{Allocating Cleared +Space}. + +@item void *valloc (size_t @var{size}) +Allocate a block of @var{size} bytes, starting on a page boundary. +@xref{Aligned Memory Blocks}. + +@item void *aligned_alloc (size_t @var{size}, size_t @var{alignment}) +Allocate a block of @var{size} bytes, starting on an address that is a +multiple of @var{alignment}. @xref{Aligned Memory Blocks}. + +@item int posix_memalign (void **@var{memptr}, size_t @var{alignment}, size_t @var{size}) +Allocate a block of @var{size} bytes, starting on an address that is a +multiple of @var{alignment}. @xref{Aligned Memory Blocks}. + +@item void *memalign (size_t @var{size}, size_t @var{boundary}) +Allocate a block of @var{size} bytes, starting on an address that is a +multiple of @var{boundary}. @xref{Aligned Memory Blocks}. + +@item int mallopt (int @var{param}, int @var{value}) +Adjust a tunable parameter. @xref{Malloc Tunable Parameters}. + +@item int mcheck (void (*@var{abortfn}) (void)) +Tell @code{malloc} to perform occasional consistency checks on +dynamically allocated memory, and to call @var{abortfn} when an +inconsistency is found. @xref{Heap Consistency Checking}. + +@item void *(*__malloc_hook) (size_t @var{size}, const void *@var{caller}) +A pointer to a function that @code{malloc} uses whenever it is called. + +@item void *(*__realloc_hook) (void *@var{ptr}, size_t @var{size}, const void *@var{caller}) +A pointer to a function that @code{realloc} uses whenever it is called. + +@item void (*__free_hook) (void *@var{ptr}, const void *@var{caller}) +A pointer to a function that @code{free} uses whenever it is called. + +@item void (*__memalign_hook) (size_t @var{size}, size_t @var{alignment}, const void *@var{caller}) +A pointer to a function that @code{aligned_alloc}, @code{memalign}, +@code{posix_memalign} and @code{valloc} use whenever they are called. + +@item struct mallinfo mallinfo (void) +Return information about the current dynamic memory usage. +@xref{Statistics of Malloc}. +@end table + +@node Allocation Debugging +@subsection Allocation Debugging +@cindex allocation debugging +@cindex malloc debugger + +A complicated task when programming with languages which do not use +garbage collected dynamic memory allocation is to find memory leaks. +Long running programs must ensure that dynamically allocated objects are +freed at the end of their lifetime. If this does not happen the system +runs out of memory, sooner or later. + +The @code{malloc} implementation in @theglibc{} provides some +simple means to detect such leaks and obtain some information to find +the location. To do this the application must be started in a special +mode which is enabled by an environment variable. There are no speed +penalties for the program if the debugging mode is not enabled. + +@menu +* Tracing malloc:: How to install the tracing functionality. +* Using the Memory Debugger:: Example programs excerpts. +* Tips for the Memory Debugger:: Some more or less clever ideas. +* Interpreting the traces:: What do all these lines mean? +@end menu + +@node Tracing malloc +@subsubsection How to install the tracing functionality + +@comment mcheck.h +@comment GNU +@deftypefun void mtrace (void) +@safety{@prelim{}@mtunsafe{@mtsenv{} @mtasurace{:mtrace} @mtasuconst{:malloc_hooks} @mtuinit{}}@asunsafe{@asuinit{} @ascuheap{} @asucorrupt{} @asulock{}}@acunsafe{@acuinit{} @acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c Like the mcheck hooks, these are not designed with thread safety in +@c mind, because the hook pointers are temporarily modified without +@c regard to other threads, signals or cancellation. + +@c mtrace @mtuinit @mtasurace:mtrace @mtsenv @asuinit @ascuheap @asucorrupt @acuinit @acucorrupt @aculock @acsfd @acsmem +@c __libc_secure_getenv dup @mtsenv +@c malloc dup @ascuheap @acsmem +@c fopen dup @ascuheap @asulock @aculock @acsmem @acsfd +@c fcntl dup ok +@c setvbuf dup @aculock +@c fprintf dup (on newly-created stream) @aculock +@c __cxa_atexit (once) dup @asulock @aculock @acsmem +@c free dup @ascuheap @acsmem +When the @code{mtrace} function is called it looks for an environment +variable named @code{MALLOC_TRACE}. This variable is supposed to +contain a valid file name. The user must have write access. If the +file already exists it is truncated. If the environment variable is not +set or it does not name a valid file which can be opened for writing +nothing is done. The behavior of @code{malloc} etc. is not changed. +For obvious reasons this also happens if the application is installed +with the SUID or SGID bit set. + +If the named file is successfully opened, @code{mtrace} installs special +handlers for the functions @code{malloc}, @code{realloc}, and +@code{free} (@pxref{Hooks for Malloc}). From then on, all uses of these +functions are traced and protocolled into the file. There is now of +course a speed penalty for all calls to the traced functions so tracing +should not be enabled during normal use. + +This function is a GNU extension and generally not available on other +systems. The prototype can be found in @file{mcheck.h}. +@end deftypefun + +@comment mcheck.h +@comment GNU +@deftypefun void muntrace (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:mtrace} @mtasuconst{:malloc_hooks} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{} @aculock{} @acsfd{}}} + +@c muntrace @mtasurace:mtrace @mtslocale @asucorrupt @ascuheap @acucorrupt @acsmem @aculock @acsfd +@c fprintf (fputs) dup @mtslocale @asucorrupt @ascuheap @acsmem @aculock @acucorrupt +@c fclose dup @ascuheap @asulock @aculock @acsmem @acsfd +The @code{muntrace} function can be called after @code{mtrace} was used +to enable tracing the @code{malloc} calls. If no (successful) call of +@code{mtrace} was made @code{muntrace} does nothing. + +Otherwise it deinstalls the handlers for @code{malloc}, @code{realloc}, +and @code{free} and then closes the protocol file. No calls are +protocolled anymore and the program runs again at full speed. + +This function is a GNU extension and generally not available on other +systems. The prototype can be found in @file{mcheck.h}. +@end deftypefun + +@node Using the Memory Debugger +@subsubsection Example program excerpts + +Even though the tracing functionality does not influence the runtime +behavior of the program it is not a good idea to call @code{mtrace} in +all programs. Just imagine that you debug a program using @code{mtrace} +and all other programs used in the debugging session also trace their +@code{malloc} calls. The output file would be the same for all programs +and thus is unusable. Therefore one should call @code{mtrace} only if +compiled for debugging. A program could therefore start like this: + +@example +#include <mcheck.h> + +int +main (int argc, char *argv[]) +@{ +#ifdef DEBUGGING + mtrace (); +#endif + @dots{} +@} +@end example + +This is all that is needed if you want to trace the calls during the +whole runtime of the program. Alternatively you can stop the tracing at +any time with a call to @code{muntrace}. It is even possible to restart +the tracing again with a new call to @code{mtrace}. But this can cause +unreliable results since there may be calls of the functions which are +not called. Please note that not only the application uses the traced +functions, also libraries (including the C library itself) use these +functions. + +This last point is also why it is not a good idea to call @code{muntrace} +before the program terminates. The libraries are informed about the +termination of the program only after the program returns from +@code{main} or calls @code{exit} and so cannot free the memory they use +before this time. + +So the best thing one can do is to call @code{mtrace} as the very first +function in the program and never call @code{muntrace}. So the program +traces almost all uses of the @code{malloc} functions (except those +calls which are executed by constructors of the program or used +libraries). + +@node Tips for the Memory Debugger +@subsubsection Some more or less clever ideas + +You know the situation. The program is prepared for debugging and in +all debugging sessions it runs well. But once it is started without +debugging the error shows up. A typical example is a memory leak that +becomes visible only when we turn off the debugging. If you foresee +such situations you can still win. Simply use something equivalent to +the following little program: + +@example +#include <mcheck.h> +#include <signal.h> + +static void +enable (int sig) +@{ + mtrace (); + signal (SIGUSR1, enable); +@} + +static void +disable (int sig) +@{ + muntrace (); + signal (SIGUSR2, disable); +@} + +int +main (int argc, char *argv[]) +@{ + @dots{} + + signal (SIGUSR1, enable); + signal (SIGUSR2, disable); + + @dots{} +@} +@end example + +I.e., the user can start the memory debugger any time s/he wants if the +program was started with @code{MALLOC_TRACE} set in the environment. +The output will of course not show the allocations which happened before +the first signal but if there is a memory leak this will show up +nevertheless. + +@node Interpreting the traces +@subsubsection Interpreting the traces + +If you take a look at the output it will look similar to this: + +@example += Start +@ [0x8048209] - 0x8064cc8 +@ [0x8048209] - 0x8064ce0 +@ [0x8048209] - 0x8064cf8 +@ [0x80481eb] + 0x8064c48 0x14 +@ [0x80481eb] + 0x8064c60 0x14 +@ [0x80481eb] + 0x8064c78 0x14 +@ [0x80481eb] + 0x8064c90 0x14 += End +@end example + +What this all means is not really important since the trace file is not +meant to be read by a human. Therefore no attention is given to +readability. Instead there is a program which comes with @theglibc{} +which interprets the traces and outputs a summary in an +user-friendly way. The program is called @code{mtrace} (it is in fact a +Perl script) and it takes one or two arguments. In any case the name of +the file with the trace output must be specified. If an optional +argument precedes the name of the trace file this must be the name of +the program which generated the trace. + +@example +drepper$ mtrace tst-mtrace log +No memory leaks. +@end example + +In this case the program @code{tst-mtrace} was run and it produced a +trace file @file{log}. The message printed by @code{mtrace} shows there +are no problems with the code, all allocated memory was freed +afterwards. + +If we call @code{mtrace} on the example trace given above we would get a +different outout: + +@example +drepper$ mtrace errlog +- 0x08064cc8 Free 2 was never alloc'd 0x8048209 +- 0x08064ce0 Free 3 was never alloc'd 0x8048209 +- 0x08064cf8 Free 4 was never alloc'd 0x8048209 + +Memory not freed: +----------------- + Address Size Caller +0x08064c48 0x14 at 0x80481eb +0x08064c60 0x14 at 0x80481eb +0x08064c78 0x14 at 0x80481eb +0x08064c90 0x14 at 0x80481eb +@end example + +We have called @code{mtrace} with only one argument and so the script +has no chance to find out what is meant with the addresses given in the +trace. We can do better: + +@example +drepper$ mtrace tst errlog +- 0x08064cc8 Free 2 was never alloc'd /home/drepper/tst.c:39 +- 0x08064ce0 Free 3 was never alloc'd /home/drepper/tst.c:39 +- 0x08064cf8 Free 4 was never alloc'd /home/drepper/tst.c:39 + +Memory not freed: +----------------- + Address Size Caller +0x08064c48 0x14 at /home/drepper/tst.c:33 +0x08064c60 0x14 at /home/drepper/tst.c:33 +0x08064c78 0x14 at /home/drepper/tst.c:33 +0x08064c90 0x14 at /home/drepper/tst.c:33 +@end example + +Suddenly the output makes much more sense and the user can see +immediately where the function calls causing the trouble can be found. + +Interpreting this output is not complicated. There are at most two +different situations being detected. First, @code{free} was called for +pointers which were never returned by one of the allocation functions. +This is usually a very bad problem and what this looks like is shown in +the first three lines of the output. Situations like this are quite +rare and if they appear they show up very drastically: the program +normally crashes. + +The other situation which is much harder to detect are memory leaks. As +you can see in the output the @code{mtrace} function collects all this +information and so can say that the program calls an allocation function +from line 33 in the source file @file{/home/drepper/tst-mtrace.c} four +times without freeing this memory before the program terminates. +Whether this is a real problem remains to be investigated. + +@node Replacing malloc +@subsection Replacing @code{malloc} + +@cindex @code{malloc} replacement +@cindex @code{LD_PRELOAD} and @code{malloc} +@cindex alternative @code{malloc} implementations +@cindex customizing @code{malloc} +@cindex interposing @code{malloc} +@cindex preempting @code{malloc} +@cindex replacing @code{malloc} +@Theglibc{} supports replacing the built-in @code{malloc} implementation +with a different allocator with the same interface. For dynamically +linked programs, this happens through ELF symbol interposition, either +using shared object dependencies or @code{LD_PRELOAD}. For static +linking, the @code{malloc} replacement library must be linked in before +linking against @code{libc.a} (explicitly or implicitly). + +@strong{Note:} Failure to provide a complete set of replacement +functions (that is, all the functions used by the application, +@theglibc{}, and other linked-in libraries) can lead to static linking +failures, and, at run time, to heap corruption and application crashes. + +The minimum set of functions which has to be provided by a custom +@code{malloc} is given in the table below. + +@table @code +@item malloc +@item free +@item calloc +@item realloc +@end table + +These @code{malloc}-related functions are required for @theglibc{} to +work.@footnote{Versions of @theglibc{} before 2.25 required that a +custom @code{malloc} defines @code{__libc_memalign} (with the same +interface as the @code{memalign} function).} + +The @code{malloc} implementation in @theglibc{} provides additional +functionality not used by the library itself, but which is often used by +other system libraries and applications. A general-purpose replacement +@code{malloc} implementation should provide definitions of these +functions, too. Their names are listed in the following table. + +@table @code +@item aligned_alloc +@item malloc_usable_size +@item memalign +@item posix_memalign +@item pvalloc +@item valloc +@end table + +In addition, very old applications may use the obsolete @code{cfree} +function. + +Further @code{malloc}-related functions such as @code{mallopt} or +@code{mallinfo} will not have any effect or return incorrect statistics +when a replacement @code{malloc} is in use. However, failure to replace +these functions typically does not result in crashes or other incorrect +application behavior, but may result in static linking failures. + +@node Obstacks +@subsection Obstacks +@cindex obstacks + +An @dfn{obstack} is a pool of memory containing a stack of objects. You +can create any number of separate obstacks, and then allocate objects in +specified obstacks. Within each obstack, the last object allocated must +always be the first one freed, but distinct obstacks are independent of +each other. + +Aside from this one constraint of order of freeing, obstacks are totally +general: an obstack can contain any number of objects of any size. They +are implemented with macros, so allocation is usually very fast as long as +the objects are usually small. And the only space overhead per object is +the padding needed to start each object on a suitable boundary. + +@menu +* Creating Obstacks:: How to declare an obstack in your program. +* Preparing for Obstacks:: Preparations needed before you can + use obstacks. +* Allocation in an Obstack:: Allocating objects in an obstack. +* Freeing Obstack Objects:: Freeing objects in an obstack. +* Obstack Functions:: The obstack functions are both + functions and macros. +* Growing Objects:: Making an object bigger by stages. +* Extra Fast Growing:: Extra-high-efficiency (though more + complicated) growing objects. +* Status of an Obstack:: Inquiries about the status of an obstack. +* Obstacks Data Alignment:: Controlling alignment of objects in obstacks. +* Obstack Chunks:: How obstacks obtain and release chunks; + efficiency considerations. +* Summary of Obstacks:: +@end menu + +@node Creating Obstacks +@subsubsection Creating Obstacks + +The utilities for manipulating obstacks are declared in the header +file @file{obstack.h}. +@pindex obstack.h + +@comment obstack.h +@comment GNU +@deftp {Data Type} {struct obstack} +An obstack is represented by a data structure of type @code{struct +obstack}. This structure has a small fixed size; it records the status +of the obstack and how to find the space in which objects are allocated. +It does not contain any of the objects themselves. You should not try +to access the contents of the structure directly; use only the functions +described in this chapter. +@end deftp + +You can declare variables of type @code{struct obstack} and use them as +obstacks, or you can allocate obstacks dynamically like any other kind +of object. Dynamic allocation of obstacks allows your program to have a +variable number of different stacks. (You can even allocate an +obstack structure in another obstack, but this is rarely useful.) + +All the functions that work with obstacks require you to specify which +obstack to use. You do this with a pointer of type @code{struct obstack +*}. In the following, we often say ``an obstack'' when strictly +speaking the object at hand is such a pointer. + +The objects in the obstack are packed into large blocks called +@dfn{chunks}. The @code{struct obstack} structure points to a chain of +the chunks currently in use. + +The obstack library obtains a new chunk whenever you allocate an object +that won't fit in the previous chunk. Since the obstack library manages +chunks automatically, you don't need to pay much attention to them, but +you do need to supply a function which the obstack library should use to +get a chunk. Usually you supply a function which uses @code{malloc} +directly or indirectly. You must also supply a function to free a chunk. +These matters are described in the following section. + +@node Preparing for Obstacks +@subsubsection Preparing for Using Obstacks + +Each source file in which you plan to use the obstack functions +must include the header file @file{obstack.h}, like this: + +@smallexample +#include <obstack.h> +@end smallexample + +@findex obstack_chunk_alloc +@findex obstack_chunk_free +Also, if the source file uses the macro @code{obstack_init}, it must +declare or define two functions or macros that will be called by the +obstack library. One, @code{obstack_chunk_alloc}, is used to allocate +the chunks of memory into which objects are packed. The other, +@code{obstack_chunk_free}, is used to return chunks when the objects in +them are freed. These macros should appear before any use of obstacks +in the source file. + +Usually these are defined to use @code{malloc} via the intermediary +@code{xmalloc} (@pxref{Unconstrained Allocation}). This is done with +the following pair of macro definitions: + +@smallexample +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free +@end smallexample + +@noindent +Though the memory you get using obstacks really comes from @code{malloc}, +using obstacks is faster because @code{malloc} is called less often, for +larger blocks of memory. @xref{Obstack Chunks}, for full details. + +At run time, before the program can use a @code{struct obstack} object +as an obstack, it must initialize the obstack by calling +@code{obstack_init}. + +@comment obstack.h +@comment GNU +@deftypefun int obstack_init (struct obstack *@var{obstack-ptr}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acsafe{@acsmem{}}} +@c obstack_init @mtsrace:obstack-ptr @acsmem +@c _obstack_begin @acsmem +@c chunkfun = obstack_chunk_alloc (suggested malloc) +@c freefun = obstack_chunk_free (suggested free) +@c *chunkfun @acsmem +@c obstack_chunk_alloc user-supplied +@c *obstack_alloc_failed_handler user-supplied +@c -> print_and_abort (default) +@c +@c print_and_abort +@c _ dup @ascuintl +@c fxprintf dup @asucorrupt @aculock @acucorrupt +@c exit @acucorrupt? +Initialize obstack @var{obstack-ptr} for allocation of objects. This +function calls the obstack's @code{obstack_chunk_alloc} function. If +allocation of memory fails, the function pointed to by +@code{obstack_alloc_failed_handler} is called. The @code{obstack_init} +function always returns 1 (Compatibility notice: Former versions of +obstack returned 0 if allocation failed). +@end deftypefun + +Here are two examples of how to allocate the space for an obstack and +initialize it. First, an obstack that is a static variable: + +@smallexample +static struct obstack myobstack; +@dots{} +obstack_init (&myobstack); +@end smallexample + +@noindent +Second, an obstack that is itself dynamically allocated: + +@smallexample +struct obstack *myobstack_ptr + = (struct obstack *) xmalloc (sizeof (struct obstack)); + +obstack_init (myobstack_ptr); +@end smallexample + +@comment obstack.h +@comment GNU +@defvar obstack_alloc_failed_handler +The value of this variable is a pointer to a function that +@code{obstack} uses when @code{obstack_chunk_alloc} fails to allocate +memory. The default action is to print a message and abort. +You should supply a function that either calls @code{exit} +(@pxref{Program Termination}) or @code{longjmp} (@pxref{Non-Local +Exits}) and doesn't return. + +@smallexample +void my_obstack_alloc_failed (void) +@dots{} +obstack_alloc_failed_handler = &my_obstack_alloc_failed; +@end smallexample + +@end defvar + +@node Allocation in an Obstack +@subsubsection Allocation in an Obstack +@cindex allocation (obstacks) + +The most direct way to allocate an object in an obstack is with +@code{obstack_alloc}, which is invoked almost like @code{malloc}. + +@comment obstack.h +@comment GNU +@deftypefun {void *} obstack_alloc (struct obstack *@var{obstack-ptr}, int @var{size}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_alloc @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_blank dup @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_finish dup @mtsrace:obstack-ptr @acucorrupt +This allocates an uninitialized block of @var{size} bytes in an obstack +and returns its address. Here @var{obstack-ptr} specifies which obstack +to allocate the block in; it is the address of the @code{struct obstack} +object which represents the obstack. Each obstack function or macro +requires you to specify an @var{obstack-ptr} as the first argument. + +This function calls the obstack's @code{obstack_chunk_alloc} function if +it needs to allocate a new chunk of memory; it calls +@code{obstack_alloc_failed_handler} if allocation of memory by +@code{obstack_chunk_alloc} failed. +@end deftypefun + +For example, here is a function that allocates a copy of a string @var{str} +in a specific obstack, which is in the variable @code{string_obstack}: + +@smallexample +struct obstack string_obstack; + +char * +copystring (char *string) +@{ + size_t len = strlen (string) + 1; + char *s = (char *) obstack_alloc (&string_obstack, len); + memcpy (s, string, len); + return s; +@} +@end smallexample + +To allocate a block with specified contents, use the function +@code{obstack_copy}, declared like this: + +@comment obstack.h +@comment GNU +@deftypefun {void *} obstack_copy (struct obstack *@var{obstack-ptr}, void *@var{address}, int @var{size}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_copy @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_grow dup @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_finish dup @mtsrace:obstack-ptr @acucorrupt +This allocates a block and initializes it by copying @var{size} +bytes of data starting at @var{address}. It calls +@code{obstack_alloc_failed_handler} if allocation of memory by +@code{obstack_chunk_alloc} failed. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun {void *} obstack_copy0 (struct obstack *@var{obstack-ptr}, void *@var{address}, int @var{size}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_copy0 @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_grow0 dup @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_finish dup @mtsrace:obstack-ptr @acucorrupt +Like @code{obstack_copy}, but appends an extra byte containing a null +character. This extra byte is not counted in the argument @var{size}. +@end deftypefun + +The @code{obstack_copy0} function is convenient for copying a sequence +of characters into an obstack as a null-terminated string. Here is an +example of its use: + +@smallexample +char * +obstack_savestring (char *addr, int size) +@{ + return obstack_copy0 (&myobstack, addr, size); +@} +@end smallexample + +@noindent +Contrast this with the previous example of @code{savestring} using +@code{malloc} (@pxref{Basic Allocation}). + +@node Freeing Obstack Objects +@subsubsection Freeing Objects in an Obstack +@cindex freeing (obstacks) + +To free an object allocated in an obstack, use the function +@code{obstack_free}. Since the obstack is a stack of objects, freeing +one object automatically frees all other objects allocated more recently +in the same obstack. + +@comment obstack.h +@comment GNU +@deftypefun void obstack_free (struct obstack *@var{obstack-ptr}, void *@var{object}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{}}} +@c obstack_free @mtsrace:obstack-ptr @acucorrupt +@c (obstack_free) @mtsrace:obstack-ptr @acucorrupt +@c *freefun dup user-supplied +If @var{object} is a null pointer, everything allocated in the obstack +is freed. Otherwise, @var{object} must be the address of an object +allocated in the obstack. Then @var{object} is freed, along with +everything allocated in @var{obstack-ptr} since @var{object}. +@end deftypefun + +Note that if @var{object} is a null pointer, the result is an +uninitialized obstack. To free all memory in an obstack but leave it +valid for further allocation, call @code{obstack_free} with the address +of the first object allocated on the obstack: + +@smallexample +obstack_free (obstack_ptr, first_object_allocated_ptr); +@end smallexample + +Recall that the objects in an obstack are grouped into chunks. When all +the objects in a chunk become free, the obstack library automatically +frees the chunk (@pxref{Preparing for Obstacks}). Then other +obstacks, or non-obstack allocation, can reuse the space of the chunk. + +@node Obstack Functions +@subsubsection Obstack Functions and Macros +@cindex macros + +The interfaces for using obstacks may be defined either as functions or +as macros, depending on the compiler. The obstack facility works with +all C compilers, including both @w{ISO C} and traditional C, but there are +precautions you must take if you plan to use compilers other than GNU C. + +If you are using an old-fashioned @w{non-ISO C} compiler, all the obstack +``functions'' are actually defined only as macros. You can call these +macros like functions, but you cannot use them in any other way (for +example, you cannot take their address). + +Calling the macros requires a special precaution: namely, the first +operand (the obstack pointer) may not contain any side effects, because +it may be computed more than once. For example, if you write this: + +@smallexample +obstack_alloc (get_obstack (), 4); +@end smallexample + +@noindent +you will find that @code{get_obstack} may be called several times. +If you use @code{*obstack_list_ptr++} as the obstack pointer argument, +you will get very strange results since the incrementation may occur +several times. + +In @w{ISO C}, each function has both a macro definition and a function +definition. The function definition is used if you take the address of the +function without calling it. An ordinary call uses the macro definition by +default, but you can request the function definition instead by writing the +function name in parentheses, as shown here: + +@smallexample +char *x; +void *(*funcp) (); +/* @r{Use the macro}. */ +x = (char *) obstack_alloc (obptr, size); +/* @r{Call the function}. */ +x = (char *) (obstack_alloc) (obptr, size); +/* @r{Take the address of the function}. */ +funcp = obstack_alloc; +@end smallexample + +@noindent +This is the same situation that exists in @w{ISO C} for the standard library +functions. @xref{Macro Definitions}. + +@strong{Warning:} When you do use the macros, you must observe the +precaution of avoiding side effects in the first operand, even in @w{ISO C}. + +If you use the GNU C compiler, this precaution is not necessary, because +various language extensions in GNU C permit defining the macros so as to +compute each argument only once. + +@node Growing Objects +@subsubsection Growing Objects +@cindex growing objects (in obstacks) +@cindex changing the size of a block (obstacks) + +Because memory in obstack chunks is used sequentially, it is possible to +build up an object step by step, adding one or more bytes at a time to the +end of the object. With this technique, you do not need to know how much +data you will put in the object until you come to the end of it. We call +this the technique of @dfn{growing objects}. The special functions +for adding data to the growing object are described in this section. + +You don't need to do anything special when you start to grow an object. +Using one of the functions to add data to the object automatically +starts it. However, it is necessary to say explicitly when the object is +finished. This is done with the function @code{obstack_finish}. + +The actual address of the object thus built up is not known until the +object is finished. Until then, it always remains possible that you will +add so much data that the object must be copied into a new chunk. + +While the obstack is in use for a growing object, you cannot use it for +ordinary allocation of another object. If you try to do so, the space +already added to the growing object will become part of the other object. + +@comment obstack.h +@comment GNU +@deftypefun void obstack_blank (struct obstack *@var{obstack-ptr}, int @var{size}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_blank @mtsrace:obstack-ptr @acucorrupt @acsmem +@c _obstack_newchunk @mtsrace:obstack-ptr @acucorrupt @acsmem +@c *chunkfun dup @acsmem +@c *obstack_alloc_failed_handler dup user-supplied +@c *freefun +@c obstack_blank_fast dup @mtsrace:obstack-ptr +The most basic function for adding to a growing object is +@code{obstack_blank}, which adds space without initializing it. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun void obstack_grow (struct obstack *@var{obstack-ptr}, void *@var{data}, int @var{size}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_grow @mtsrace:obstack-ptr @acucorrupt @acsmem +@c _obstack_newchunk dup @mtsrace:obstack-ptr @acucorrupt @acsmem +@c memcpy ok +To add a block of initialized space, use @code{obstack_grow}, which is +the growing-object analogue of @code{obstack_copy}. It adds @var{size} +bytes of data to the growing object, copying the contents from +@var{data}. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun void obstack_grow0 (struct obstack *@var{obstack-ptr}, void *@var{data}, int @var{size}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_grow0 @mtsrace:obstack-ptr @acucorrupt @acsmem +@c (no sequence point between storing NUL and incrementing next_free) +@c (multiple changes to next_free => @acucorrupt) +@c _obstack_newchunk dup @mtsrace:obstack-ptr @acucorrupt @acsmem +@c memcpy ok +This is the growing-object analogue of @code{obstack_copy0}. It adds +@var{size} bytes copied from @var{data}, followed by an additional null +character. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun void obstack_1grow (struct obstack *@var{obstack-ptr}, char @var{c}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_1grow @mtsrace:obstack-ptr @acucorrupt @acsmem +@c _obstack_newchunk dup @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_1grow_fast dup @mtsrace:obstack-ptr @acucorrupt @acsmem +To add one character at a time, use the function @code{obstack_1grow}. +It adds a single byte containing @var{c} to the growing object. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun void obstack_ptr_grow (struct obstack *@var{obstack-ptr}, void *@var{data}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_ptr_grow @mtsrace:obstack-ptr @acucorrupt @acsmem +@c _obstack_newchunk dup @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_ptr_grow_fast dup @mtsrace:obstack-ptr +Adding the value of a pointer one can use the function +@code{obstack_ptr_grow}. It adds @code{sizeof (void *)} bytes +containing the value of @var{data}. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun void obstack_int_grow (struct obstack *@var{obstack-ptr}, int @var{data}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_int_grow @mtsrace:obstack-ptr @acucorrupt @acsmem +@c _obstack_newchunk dup @mtsrace:obstack-ptr @acucorrupt @acsmem +@c obstack_int_grow_fast dup @mtsrace:obstack-ptr +A single value of type @code{int} can be added by using the +@code{obstack_int_grow} function. It adds @code{sizeof (int)} bytes to +the growing object and initializes them with the value of @var{data}. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun {void *} obstack_finish (struct obstack *@var{obstack-ptr}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{}}} +@c obstack_finish @mtsrace:obstack-ptr @acucorrupt +When you are finished growing the object, use the function +@code{obstack_finish} to close it off and return its final address. + +Once you have finished the object, the obstack is available for ordinary +allocation or for growing another object. + +This function can return a null pointer under the same conditions as +@code{obstack_alloc} (@pxref{Allocation in an Obstack}). +@end deftypefun + +When you build an object by growing it, you will probably need to know +afterward how long it became. You need not keep track of this as you grow +the object, because you can find out the length from the obstack just +before finishing the object with the function @code{obstack_object_size}, +declared as follows: + +@comment obstack.h +@comment GNU +@deftypefun int obstack_object_size (struct obstack *@var{obstack-ptr}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acsafe{}} +This function returns the current size of the growing object, in bytes. +Remember to call this function @emph{before} finishing the object. +After it is finished, @code{obstack_object_size} will return zero. +@end deftypefun + +If you have started growing an object and wish to cancel it, you should +finish it and then free it, like this: + +@smallexample +obstack_free (obstack_ptr, obstack_finish (obstack_ptr)); +@end smallexample + +@noindent +This has no effect if no object was growing. + +@cindex shrinking objects +You can use @code{obstack_blank} with a negative size argument to make +the current object smaller. Just don't try to shrink it beyond zero +length---there's no telling what will happen if you do that. + +@node Extra Fast Growing +@subsubsection Extra Fast Growing Objects +@cindex efficiency and obstacks + +The usual functions for growing objects incur overhead for checking +whether there is room for the new growth in the current chunk. If you +are frequently constructing objects in small steps of growth, this +overhead can be significant. + +You can reduce the overhead by using special ``fast growth'' +functions that grow the object without checking. In order to have a +robust program, you must do the checking yourself. If you do this checking +in the simplest way each time you are about to add data to the object, you +have not saved anything, because that is what the ordinary growth +functions do. But if you can arrange to check less often, or check +more efficiently, then you make the program faster. + +The function @code{obstack_room} returns the amount of room available +in the current chunk. It is declared as follows: + +@comment obstack.h +@comment GNU +@deftypefun int obstack_room (struct obstack *@var{obstack-ptr}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acsafe{}} +This returns the number of bytes that can be added safely to the current +growing object (or to an object about to be started) in obstack +@var{obstack-ptr} using the fast growth functions. +@end deftypefun + +While you know there is room, you can use these fast growth functions +for adding data to a growing object: + +@comment obstack.h +@comment GNU +@deftypefun void obstack_1grow_fast (struct obstack *@var{obstack-ptr}, char @var{c}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acunsafe{@acucorrupt{} @acsmem{}}} +@c obstack_1grow_fast @mtsrace:obstack-ptr @acucorrupt @acsmem +@c (no sequence point between copying c and incrementing next_free) +The function @code{obstack_1grow_fast} adds one byte containing the +character @var{c} to the growing object in obstack @var{obstack-ptr}. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun void obstack_ptr_grow_fast (struct obstack *@var{obstack-ptr}, void *@var{data}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acsafe{}} +@c obstack_ptr_grow_fast @mtsrace:obstack-ptr +The function @code{obstack_ptr_grow_fast} adds @code{sizeof (void *)} +bytes containing the value of @var{data} to the growing object in +obstack @var{obstack-ptr}. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun void obstack_int_grow_fast (struct obstack *@var{obstack-ptr}, int @var{data}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acsafe{}} +@c obstack_int_grow_fast @mtsrace:obstack-ptr +The function @code{obstack_int_grow_fast} adds @code{sizeof (int)} bytes +containing the value of @var{data} to the growing object in obstack +@var{obstack-ptr}. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun void obstack_blank_fast (struct obstack *@var{obstack-ptr}, int @var{size}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acsafe{}} +@c obstack_blank_fast @mtsrace:obstack-ptr +The function @code{obstack_blank_fast} adds @var{size} bytes to the +growing object in obstack @var{obstack-ptr} without initializing them. +@end deftypefun + +When you check for space using @code{obstack_room} and there is not +enough room for what you want to add, the fast growth functions +are not safe. In this case, simply use the corresponding ordinary +growth function instead. Very soon this will copy the object to a +new chunk; then there will be lots of room available again. + +So, each time you use an ordinary growth function, check afterward for +sufficient space using @code{obstack_room}. Once the object is copied +to a new chunk, there will be plenty of space again, so the program will +start using the fast growth functions again. + +Here is an example: + +@smallexample +@group +void +add_string (struct obstack *obstack, const char *ptr, int len) +@{ + while (len > 0) + @{ + int room = obstack_room (obstack); + if (room == 0) + @{ + /* @r{Not enough room. Add one character slowly,} + @r{which may copy to a new chunk and make room.} */ + obstack_1grow (obstack, *ptr++); + len--; + @} + else + @{ + if (room > len) + room = len; + /* @r{Add fast as much as we have room for.} */ + len -= room; + while (room-- > 0) + obstack_1grow_fast (obstack, *ptr++); + @} + @} +@} +@end group +@end smallexample + +@node Status of an Obstack +@subsubsection Status of an Obstack +@cindex obstack status +@cindex status of obstack + +Here are functions that provide information on the current status of +allocation in an obstack. You can use them to learn about an object while +still growing it. + +@comment obstack.h +@comment GNU +@deftypefun {void *} obstack_base (struct obstack *@var{obstack-ptr}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acsafe{}} +This function returns the tentative address of the beginning of the +currently growing object in @var{obstack-ptr}. If you finish the object +immediately, it will have that address. If you make it larger first, it +may outgrow the current chunk---then its address will change! + +If no object is growing, this value says where the next object you +allocate will start (once again assuming it fits in the current +chunk). +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun {void *} obstack_next_free (struct obstack *@var{obstack-ptr}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acsafe{}} +This function returns the address of the first free byte in the current +chunk of obstack @var{obstack-ptr}. This is the end of the currently +growing object. If no object is growing, @code{obstack_next_free} +returns the same value as @code{obstack_base}. +@end deftypefun + +@comment obstack.h +@comment GNU +@deftypefun int obstack_object_size (struct obstack *@var{obstack-ptr}) +@c dup +@safety{@prelim{}@mtsafe{@mtsrace{:obstack-ptr}}@assafe{}@acsafe{}} +This function returns the size in bytes of the currently growing object. +This is equivalent to + +@smallexample +obstack_next_free (@var{obstack-ptr}) - obstack_base (@var{obstack-ptr}) +@end smallexample +@end deftypefun + +@node Obstacks Data Alignment +@subsubsection Alignment of Data in Obstacks +@cindex alignment (in obstacks) + +Each obstack has an @dfn{alignment boundary}; each object allocated in +the obstack automatically starts on an address that is a multiple of the +specified boundary. By default, this boundary is aligned so that +the object can hold any type of data. + +To access an obstack's alignment boundary, use the macro +@code{obstack_alignment_mask}, whose function prototype looks like +this: + +@comment obstack.h +@comment GNU +@deftypefn Macro int obstack_alignment_mask (struct obstack *@var{obstack-ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The value is a bit mask; a bit that is 1 indicates that the corresponding +bit in the address of an object should be 0. The mask value should be one +less than a power of 2; the effect is that all object addresses are +multiples of that power of 2. The default value of the mask is a value +that allows aligned objects to hold any type of data: for example, if +its value is 3, any type of data can be stored at locations whose +addresses are multiples of 4. A mask value of 0 means an object can start +on any multiple of 1 (that is, no alignment is required). + +The expansion of the macro @code{obstack_alignment_mask} is an lvalue, +so you can alter the mask by assignment. For example, this statement: + +@smallexample +obstack_alignment_mask (obstack_ptr) = 0; +@end smallexample + +@noindent +has the effect of turning off alignment processing in the specified obstack. +@end deftypefn + +Note that a change in alignment mask does not take effect until +@emph{after} the next time an object is allocated or finished in the +obstack. If you are not growing an object, you can make the new +alignment mask take effect immediately by calling @code{obstack_finish}. +This will finish a zero-length object and then do proper alignment for +the next object. + +@node Obstack Chunks +@subsubsection Obstack Chunks +@cindex efficiency of chunks +@cindex chunks + +Obstacks work by allocating space for themselves in large chunks, and +then parceling out space in the chunks to satisfy your requests. Chunks +are normally 4096 bytes long unless you specify a different chunk size. +The chunk size includes 8 bytes of overhead that are not actually used +for storing objects. Regardless of the specified size, longer chunks +will be allocated when necessary for long objects. + +The obstack library allocates chunks by calling the function +@code{obstack_chunk_alloc}, which you must define. When a chunk is no +longer needed because you have freed all the objects in it, the obstack +library frees the chunk by calling @code{obstack_chunk_free}, which you +must also define. + +These two must be defined (as macros) or declared (as functions) in each +source file that uses @code{obstack_init} (@pxref{Creating Obstacks}). +Most often they are defined as macros like this: + +@smallexample +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free +@end smallexample + +Note that these are simple macros (no arguments). Macro definitions with +arguments will not work! It is necessary that @code{obstack_chunk_alloc} +or @code{obstack_chunk_free}, alone, expand into a function name if it is +not itself a function name. + +If you allocate chunks with @code{malloc}, the chunk size should be a +power of 2. The default chunk size, 4096, was chosen because it is long +enough to satisfy many typical requests on the obstack yet short enough +not to waste too much memory in the portion of the last chunk not yet used. + +@comment obstack.h +@comment GNU +@deftypefn Macro int obstack_chunk_size (struct obstack *@var{obstack-ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This returns the chunk size of the given obstack. +@end deftypefn + +Since this macro expands to an lvalue, you can specify a new chunk size by +assigning it a new value. Doing so does not affect the chunks already +allocated, but will change the size of chunks allocated for that particular +obstack in the future. It is unlikely to be useful to make the chunk size +smaller, but making it larger might improve efficiency if you are +allocating many objects whose size is comparable to the chunk size. Here +is how to do so cleanly: + +@smallexample +if (obstack_chunk_size (obstack_ptr) < @var{new-chunk-size}) + obstack_chunk_size (obstack_ptr) = @var{new-chunk-size}; +@end smallexample + +@node Summary of Obstacks +@subsubsection Summary of Obstack Functions + +Here is a summary of all the functions associated with obstacks. Each +takes the address of an obstack (@code{struct obstack *}) as its first +argument. + +@table @code +@item void obstack_init (struct obstack *@var{obstack-ptr}) +Initialize use of an obstack. @xref{Creating Obstacks}. + +@item void *obstack_alloc (struct obstack *@var{obstack-ptr}, int @var{size}) +Allocate an object of @var{size} uninitialized bytes. +@xref{Allocation in an Obstack}. + +@item void *obstack_copy (struct obstack *@var{obstack-ptr}, void *@var{address}, int @var{size}) +Allocate an object of @var{size} bytes, with contents copied from +@var{address}. @xref{Allocation in an Obstack}. + +@item void *obstack_copy0 (struct obstack *@var{obstack-ptr}, void *@var{address}, int @var{size}) +Allocate an object of @var{size}+1 bytes, with @var{size} of them copied +from @var{address}, followed by a null character at the end. +@xref{Allocation in an Obstack}. + +@item void obstack_free (struct obstack *@var{obstack-ptr}, void *@var{object}) +Free @var{object} (and everything allocated in the specified obstack +more recently than @var{object}). @xref{Freeing Obstack Objects}. + +@item void obstack_blank (struct obstack *@var{obstack-ptr}, int @var{size}) +Add @var{size} uninitialized bytes to a growing object. +@xref{Growing Objects}. + +@item void obstack_grow (struct obstack *@var{obstack-ptr}, void *@var{address}, int @var{size}) +Add @var{size} bytes, copied from @var{address}, to a growing object. +@xref{Growing Objects}. + +@item void obstack_grow0 (struct obstack *@var{obstack-ptr}, void *@var{address}, int @var{size}) +Add @var{size} bytes, copied from @var{address}, to a growing object, +and then add another byte containing a null character. @xref{Growing +Objects}. + +@item void obstack_1grow (struct obstack *@var{obstack-ptr}, char @var{data-char}) +Add one byte containing @var{data-char} to a growing object. +@xref{Growing Objects}. + +@item void *obstack_finish (struct obstack *@var{obstack-ptr}) +Finalize the object that is growing and return its permanent address. +@xref{Growing Objects}. + +@item int obstack_object_size (struct obstack *@var{obstack-ptr}) +Get the current size of the currently growing object. @xref{Growing +Objects}. + +@item void obstack_blank_fast (struct obstack *@var{obstack-ptr}, int @var{size}) +Add @var{size} uninitialized bytes to a growing object without checking +that there is enough room. @xref{Extra Fast Growing}. + +@item void obstack_1grow_fast (struct obstack *@var{obstack-ptr}, char @var{data-char}) +Add one byte containing @var{data-char} to a growing object without +checking that there is enough room. @xref{Extra Fast Growing}. + +@item int obstack_room (struct obstack *@var{obstack-ptr}) +Get the amount of room now available for growing the current object. +@xref{Extra Fast Growing}. + +@item int obstack_alignment_mask (struct obstack *@var{obstack-ptr}) +The mask used for aligning the beginning of an object. This is an +lvalue. @xref{Obstacks Data Alignment}. + +@item int obstack_chunk_size (struct obstack *@var{obstack-ptr}) +The size for allocating chunks. This is an lvalue. @xref{Obstack Chunks}. + +@item void *obstack_base (struct obstack *@var{obstack-ptr}) +Tentative starting address of the currently growing object. +@xref{Status of an Obstack}. + +@item void *obstack_next_free (struct obstack *@var{obstack-ptr}) +Address just after the end of the currently growing object. +@xref{Status of an Obstack}. +@end table + +@node Variable Size Automatic +@subsection Automatic Storage with Variable Size +@cindex automatic freeing +@cindex @code{alloca} function +@cindex automatic storage with variable size + +The function @code{alloca} supports a kind of half-dynamic allocation in +which blocks are allocated dynamically but freed automatically. + +Allocating a block with @code{alloca} is an explicit action; you can +allocate as many blocks as you wish, and compute the size at run time. But +all the blocks are freed when you exit the function that @code{alloca} was +called from, just as if they were automatic variables declared in that +function. There is no way to free the space explicitly. + +The prototype for @code{alloca} is in @file{stdlib.h}. This function is +a BSD extension. +@pindex stdlib.h + +@comment stdlib.h +@comment GNU, BSD +@deftypefun {void *} alloca (size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The return value of @code{alloca} is the address of a block of @var{size} +bytes of memory, allocated in the stack frame of the calling function. +@end deftypefun + +Do not use @code{alloca} inside the arguments of a function call---you +will get unpredictable results, because the stack space for the +@code{alloca} would appear on the stack in the middle of the space for +the function arguments. An example of what to avoid is @code{foo (x, +alloca (4), y)}. +@c This might get fixed in future versions of GCC, but that won't make +@c it safe with compilers generally. + +@menu +* Alloca Example:: Example of using @code{alloca}. +* Advantages of Alloca:: Reasons to use @code{alloca}. +* Disadvantages of Alloca:: Reasons to avoid @code{alloca}. +* GNU C Variable-Size Arrays:: Only in GNU C, here is an alternative + method of allocating dynamically and + freeing automatically. +@end menu + +@node Alloca Example +@subsubsection @code{alloca} Example + +As an example of the use of @code{alloca}, here is a function that opens +a file name made from concatenating two argument strings, and returns a +file descriptor or minus one signifying failure: + +@smallexample +int +open2 (char *str1, char *str2, int flags, int mode) +@{ + char *name = (char *) alloca (strlen (str1) + strlen (str2) + 1); + stpcpy (stpcpy (name, str1), str2); + return open (name, flags, mode); +@} +@end smallexample + +@noindent +Here is how you would get the same results with @code{malloc} and +@code{free}: + +@smallexample +int +open2 (char *str1, char *str2, int flags, int mode) +@{ + char *name = (char *) malloc (strlen (str1) + strlen (str2) + 1); + int desc; + if (name == 0) + fatal ("virtual memory exceeded"); + stpcpy (stpcpy (name, str1), str2); + desc = open (name, flags, mode); + free (name); + return desc; +@} +@end smallexample + +As you can see, it is simpler with @code{alloca}. But @code{alloca} has +other, more important advantages, and some disadvantages. + +@node Advantages of Alloca +@subsubsection Advantages of @code{alloca} + +Here are the reasons why @code{alloca} may be preferable to @code{malloc}: + +@itemize @bullet +@item +Using @code{alloca} wastes very little space and is very fast. (It is +open-coded by the GNU C compiler.) + +@item +Since @code{alloca} does not have separate pools for different sizes of +blocks, space used for any size block can be reused for any other size. +@code{alloca} does not cause memory fragmentation. + +@item +@cindex longjmp +Nonlocal exits done with @code{longjmp} (@pxref{Non-Local Exits}) +automatically free the space allocated with @code{alloca} when they exit +through the function that called @code{alloca}. This is the most +important reason to use @code{alloca}. + +To illustrate this, suppose you have a function +@code{open_or_report_error} which returns a descriptor, like +@code{open}, if it succeeds, but does not return to its caller if it +fails. If the file cannot be opened, it prints an error message and +jumps out to the command level of your program using @code{longjmp}. +Let's change @code{open2} (@pxref{Alloca Example}) to use this +subroutine:@refill + +@smallexample +int +open2 (char *str1, char *str2, int flags, int mode) +@{ + char *name = (char *) alloca (strlen (str1) + strlen (str2) + 1); + stpcpy (stpcpy (name, str1), str2); + return open_or_report_error (name, flags, mode); +@} +@end smallexample + +@noindent +Because of the way @code{alloca} works, the memory it allocates is +freed even when an error occurs, with no special effort required. + +By contrast, the previous definition of @code{open2} (which uses +@code{malloc} and @code{free}) would develop a memory leak if it were +changed in this way. Even if you are willing to make more changes to +fix it, there is no easy way to do so. +@end itemize + +@node Disadvantages of Alloca +@subsubsection Disadvantages of @code{alloca} + +@cindex @code{alloca} disadvantages +@cindex disadvantages of @code{alloca} +These are the disadvantages of @code{alloca} in comparison with +@code{malloc}: + +@itemize @bullet +@item +If you try to allocate more memory than the machine can provide, you +don't get a clean error message. Instead you get a fatal signal like +the one you would get from an infinite recursion; probably a +segmentation violation (@pxref{Program Error Signals}). + +@item +Some @nongnusystems{} fail to support @code{alloca}, so it is less +portable. However, a slower emulation of @code{alloca} written in C +is available for use on systems with this deficiency. +@end itemize + +@node GNU C Variable-Size Arrays +@subsubsection GNU C Variable-Size Arrays +@cindex variable-sized arrays + +In GNU C, you can replace most uses of @code{alloca} with an array of +variable size. Here is how @code{open2} would look then: + +@smallexample +int open2 (char *str1, char *str2, int flags, int mode) +@{ + char name[strlen (str1) + strlen (str2) + 1]; + stpcpy (stpcpy (name, str1), str2); + return open (name, flags, mode); +@} +@end smallexample + +But @code{alloca} is not always equivalent to a variable-sized array, for +several reasons: + +@itemize @bullet +@item +A variable size array's space is freed at the end of the scope of the +name of the array. The space allocated with @code{alloca} +remains until the end of the function. + +@item +It is possible to use @code{alloca} within a loop, allocating an +additional block on each iteration. This is impossible with +variable-sized arrays. +@end itemize + +@strong{NB:} If you mix use of @code{alloca} and variable-sized arrays +within one function, exiting a scope in which a variable-sized array was +declared frees all blocks allocated with @code{alloca} during the +execution of that scope. + + +@node Resizing the Data Segment +@section Resizing the Data Segment + +The symbols in this section are declared in @file{unistd.h}. + +You will not normally use the functions in this section, because the +functions described in @ref{Memory Allocation} are easier to use. Those +are interfaces to a @glibcadj{} memory allocator that uses the +functions below itself. The functions below are simple interfaces to +system calls. + +@comment unistd.h +@comment BSD +@deftypefun int brk (void *@var{addr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{brk} sets the high end of the calling process' data segment to +@var{addr}. + +The address of the end of a segment is defined to be the address of the +last byte in the segment plus 1. + +The function has no effect if @var{addr} is lower than the low end of +the data segment. (This is considered success, by the way.) + +The function fails if it would cause the data segment to overlap another +segment or exceed the process' data storage limit (@pxref{Limits on +Resources}). + +The function is named for a common historical case where data storage +and the stack are in the same segment. Data storage allocation grows +upward from the bottom of the segment while the stack grows downward +toward it from the top of the segment and the curtain between them is +called the @dfn{break}. + +The return value is zero on success. On failure, the return value is +@code{-1} and @code{errno} is set accordingly. The following @code{errno} +values are specific to this function: + +@table @code +@item ENOMEM +The request would cause the data segment to overlap another segment or +exceed the process' data storage limit. +@end table + +@c The Brk system call in Linux (as opposed to the GNU C Library function) +@c is considerably different. It always returns the new end of the data +@c segment, whether it succeeds or fails. The GNU C library Brk determines +@c it's a failure if and only if the system call returns an address less +@c than the address requested. + +@end deftypefun + + +@comment unistd.h +@comment BSD +@deftypefun void *sbrk (ptrdiff_t @var{delta}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +This function is the same as @code{brk} except that you specify the new +end of the data segment as an offset @var{delta} from the current end +and on success the return value is the address of the resulting end of +the data segment instead of zero. + +This means you can use @samp{sbrk(0)} to find out what the current end +of the data segment is. + +@end deftypefun + + + +@node Locking Pages +@section Locking Pages +@cindex locking pages +@cindex memory lock +@cindex paging + +You can tell the system to associate a particular virtual memory page +with a real page frame and keep it that way --- i.e., cause the page to +be paged in if it isn't already and mark it so it will never be paged +out and consequently will never cause a page fault. This is called +@dfn{locking} a page. + +The functions in this chapter lock and unlock the calling process' +pages. + +@menu +* Why Lock Pages:: Reasons to read this section. +* Locked Memory Details:: Everything you need to know locked + memory +* Page Lock Functions:: Here's how to do it. +@end menu + +@node Why Lock Pages +@subsection Why Lock Pages + +Because page faults cause paged out pages to be paged in transparently, +a process rarely needs to be concerned about locking pages. However, +there are two reasons people sometimes are: + +@itemize @bullet + +@item +Speed. A page fault is transparent only insofar as the process is not +sensitive to how long it takes to do a simple memory access. Time-critical +processes, especially realtime processes, may not be able to wait or +may not be able to tolerate variance in execution speed. +@cindex realtime processing +@cindex speed of execution + +A process that needs to lock pages for this reason probably also needs +priority among other processes for use of the CPU. @xref{Priority}. + +In some cases, the programmer knows better than the system's demand +paging allocator which pages should remain in real memory to optimize +system performance. In this case, locking pages can help. + +@item +Privacy. If you keep secrets in virtual memory and that virtual memory +gets paged out, that increases the chance that the secrets will get out. +If a password gets written out to disk swap space, for example, it might +still be there long after virtual and real memory have been wiped clean. + +@end itemize + +Be aware that when you lock a page, that's one fewer page frame that can +be used to back other virtual memory (by the same or other processes), +which can mean more page faults, which means the system runs more +slowly. In fact, if you lock enough memory, some programs may not be +able to run at all for lack of real memory. + +@node Locked Memory Details +@subsection Locked Memory Details + +A memory lock is associated with a virtual page, not a real frame. The +paging rule is: If a frame backs at least one locked page, don't page it +out. + +Memory locks do not stack. I.e., you can't lock a particular page twice +so that it has to be unlocked twice before it is truly unlocked. It is +either locked or it isn't. + +A memory lock persists until the process that owns the memory explicitly +unlocks it. (But process termination and exec cause the virtual memory +to cease to exist, which you might say means it isn't locked any more). + +Memory locks are not inherited by child processes. (But note that on a +modern Unix system, immediately after a fork, the parent's and the +child's virtual address space are backed by the same real page frames, +so the child enjoys the parent's locks). @xref{Creating a Process}. + +Because of its ability to impact other processes, only the superuser can +lock a page. Any process can unlock its own page. + +The system sets limits on the amount of memory a process can have locked +and the amount of real memory it can have dedicated to it. @xref{Limits +on Resources}. + +In Linux, locked pages aren't as locked as you might think. +Two virtual pages that are not shared memory can nonetheless be backed +by the same real frame. The kernel does this in the name of efficiency +when it knows both virtual pages contain identical data, and does it +even if one or both of the virtual pages are locked. + +But when a process modifies one of those pages, the kernel must get it a +separate frame and fill it with the page's data. This is known as a +@dfn{copy-on-write page fault}. It takes a small amount of time and in +a pathological case, getting that frame may require I/O. +@cindex copy-on-write page fault +@cindex page fault, copy-on-write + +To make sure this doesn't happen to your program, don't just lock the +pages. Write to them as well, unless you know you won't write to them +ever. And to make sure you have pre-allocated frames for your stack, +enter a scope that declares a C automatic variable larger than the +maximum stack size you will need, set it to something, then return from +its scope. + +@node Page Lock Functions +@subsection Functions To Lock And Unlock Pages + +The symbols in this section are declared in @file{sys/mman.h}. These +functions are defined by POSIX.1b, but their availability depends on +your kernel. If your kernel doesn't allow these functions, they exist +but always fail. They @emph{are} available with a Linux kernel. + +@strong{Portability Note:} POSIX.1b requires that when the @code{mlock} +and @code{munlock} functions are available, the file @file{unistd.h} +define the macro @code{_POSIX_MEMLOCK_RANGE} and the file +@code{limits.h} define the macro @code{PAGESIZE} to be the size of a +memory page in bytes. It requires that when the @code{mlockall} and +@code{munlockall} functions are available, the @file{unistd.h} file +define the macro @code{_POSIX_MEMLOCK}. @Theglibc{} conforms to +this requirement. + +@comment sys/mman.h +@comment POSIX.1b +@deftypefun int mlock (const void *@var{addr}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{mlock} locks a range of the calling process' virtual pages. + +The range of memory starts at address @var{addr} and is @var{len} bytes +long. Actually, since you must lock whole pages, it is the range of +pages that include any part of the specified range. + +When the function returns successfully, each of those pages is backed by +(connected to) a real frame (is resident) and is marked to stay that +way. This means the function may cause page-ins and have to wait for +them. + +When the function fails, it does not affect the lock status of any +pages. + +The return value is zero if the function succeeds. Otherwise, it is +@code{-1} and @code{errno} is set accordingly. @code{errno} values +specific to this function are: + +@table @code +@item ENOMEM +@itemize @bullet +@item +At least some of the specified address range does not exist in the +calling process' virtual address space. +@item +The locking would cause the process to exceed its locked page limit. +@end itemize + +@item EPERM +The calling process is not superuser. + +@item EINVAL +@var{len} is not positive. + +@item ENOSYS +The kernel does not provide @code{mlock} capability. + +@end table + +You can lock @emph{all} a process' memory with @code{mlockall}. You +unlock memory with @code{munlock} or @code{munlockall}. + +To avoid all page faults in a C program, you have to use +@code{mlockall}, because some of the memory a program uses is hidden +from the C code, e.g. the stack and automatic variables, and you +wouldn't know what address to tell @code{mlock}. + +@end deftypefun + +@comment sys/mman.h +@comment POSIX.1b +@deftypefun int munlock (const void *@var{addr}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{munlock} unlocks a range of the calling process' virtual pages. + +@code{munlock} is the inverse of @code{mlock} and functions completely +analogously to @code{mlock}, except that there is no @code{EPERM} +failure. + +@end deftypefun + +@comment sys/mman.h +@comment POSIX.1b +@deftypefun int mlockall (int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{mlockall} locks all the pages in a process' virtual memory address +space, and/or any that are added to it in the future. This includes the +pages of the code, data and stack segment, as well as shared libraries, +user space kernel data, shared memory, and memory mapped files. + +@var{flags} is a string of single bit flags represented by the following +macros. They tell @code{mlockall} which of its functions you want. All +other bits must be zero. + +@vtable @code + +@item MCL_CURRENT +Lock all pages which currently exist in the calling process' virtual +address space. + +@item MCL_FUTURE +Set a mode such that any pages added to the process' virtual address +space in the future will be locked from birth. This mode does not +affect future address spaces owned by the same process so exec, which +replaces a process' address space, wipes out @code{MCL_FUTURE}. +@xref{Executing a File}. + +@end vtable + +When the function returns successfully, and you specified +@code{MCL_CURRENT}, all of the process' pages are backed by (connected +to) real frames (they are resident) and are marked to stay that way. +This means the function may cause page-ins and have to wait for them. + +When the process is in @code{MCL_FUTURE} mode because it successfully +executed this function and specified @code{MCL_CURRENT}, any system call +by the process that requires space be added to its virtual address space +fails with @code{errno} = @code{ENOMEM} if locking the additional space +would cause the process to exceed its locked page limit. In the case +that the address space addition that can't be accommodated is stack +expansion, the stack expansion fails and the kernel sends a +@code{SIGSEGV} signal to the process. + +When the function fails, it does not affect the lock status of any pages +or the future locking mode. + +The return value is zero if the function succeeds. Otherwise, it is +@code{-1} and @code{errno} is set accordingly. @code{errno} values +specific to this function are: + +@table @code +@item ENOMEM +@itemize @bullet +@item +At least some of the specified address range does not exist in the +calling process' virtual address space. +@item +The locking would cause the process to exceed its locked page limit. +@end itemize + +@item EPERM +The calling process is not superuser. + +@item EINVAL +Undefined bits in @var{flags} are not zero. + +@item ENOSYS +The kernel does not provide @code{mlockall} capability. + +@end table + +You can lock just specific pages with @code{mlock}. You unlock pages +with @code{munlockall} and @code{munlock}. + +@end deftypefun + + +@comment sys/mman.h +@comment POSIX.1b +@deftypefun int munlockall (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{munlockall} unlocks every page in the calling process' virtual +address space and turns off @code{MCL_FUTURE} future locking mode. + +The return value is zero if the function succeeds. Otherwise, it is +@code{-1} and @code{errno} is set accordingly. The only way this +function can fail is for generic reasons that all functions and system +calls can fail, so there are no specific @code{errno} values. + +@end deftypefun + + + + +@ignore +@c This was never actually implemented. -zw +@node Relocating Allocator +@section Relocating Allocator + +@cindex relocating memory allocator +Any system of dynamic memory allocation has overhead: the amount of +space it uses is more than the amount the program asks for. The +@dfn{relocating memory allocator} achieves very low overhead by moving +blocks in memory as necessary, on its own initiative. + +@c @menu +@c * Relocator Concepts:: How to understand relocating allocation. +@c * Using Relocator:: Functions for relocating allocation. +@c @end menu + +@node Relocator Concepts +@subsection Concepts of Relocating Allocation + +@ifinfo +The @dfn{relocating memory allocator} achieves very low overhead by +moving blocks in memory as necessary, on its own initiative. +@end ifinfo + +When you allocate a block with @code{malloc}, the address of the block +never changes unless you use @code{realloc} to change its size. Thus, +you can safely store the address in various places, temporarily or +permanently, as you like. This is not safe when you use the relocating +memory allocator, because any and all relocatable blocks can move +whenever you allocate memory in any fashion. Even calling @code{malloc} +or @code{realloc} can move the relocatable blocks. + +@cindex handle +For each relocatable block, you must make a @dfn{handle}---a pointer +object in memory, designated to store the address of that block. The +relocating allocator knows where each block's handle is, and updates the +address stored there whenever it moves the block, so that the handle +always points to the block. Each time you access the contents of the +block, you should fetch its address anew from the handle. + +To call any of the relocating allocator functions from a signal handler +is almost certainly incorrect, because the signal could happen at any +time and relocate all the blocks. The only way to make this safe is to +block the signal around any access to the contents of any relocatable +block---not a convenient mode of operation. @xref{Nonreentrancy}. + +@node Using Relocator +@subsection Allocating and Freeing Relocatable Blocks + +@pindex malloc.h +In the descriptions below, @var{handleptr} designates the address of the +handle. All the functions are declared in @file{malloc.h}; all are GNU +extensions. + +@comment malloc.h +@comment GNU +@c @deftypefun {void *} r_alloc (void **@var{handleptr}, size_t @var{size}) +This function allocates a relocatable block of size @var{size}. It +stores the block's address in @code{*@var{handleptr}} and returns +a non-null pointer to indicate success. + +If @code{r_alloc} can't get the space needed, it stores a null pointer +in @code{*@var{handleptr}}, and returns a null pointer. +@end deftypefun + +@comment malloc.h +@comment GNU +@c @deftypefun void r_alloc_free (void **@var{handleptr}) +This function is the way to free a relocatable block. It frees the +block that @code{*@var{handleptr}} points to, and stores a null pointer +in @code{*@var{handleptr}} to show it doesn't point to an allocated +block any more. +@end deftypefun + +@comment malloc.h +@comment GNU +@c @deftypefun {void *} r_re_alloc (void **@var{handleptr}, size_t @var{size}) +The function @code{r_re_alloc} adjusts the size of the block that +@code{*@var{handleptr}} points to, making it @var{size} bytes long. It +stores the address of the resized block in @code{*@var{handleptr}} and +returns a non-null pointer to indicate success. + +If enough memory is not available, this function returns a null pointer +and does not modify @code{*@var{handleptr}}. +@end deftypefun +@end ignore + + + + +@ignore +@comment No longer available... + +@comment @node Memory Warnings +@comment @section Memory Usage Warnings +@comment @cindex memory usage warnings +@comment @cindex warnings of memory almost full + +@pindex malloc.c +You can ask for warnings as the program approaches running out of memory +space, by calling @code{memory_warnings}. This tells @code{malloc} to +check memory usage every time it asks for more memory from the operating +system. This is a GNU extension declared in @file{malloc.h}. + +@comment malloc.h +@comment GNU +@comment @deftypefun void memory_warnings (void *@var{start}, void (*@var{warn-func}) (const char *)) +Call this function to request warnings for nearing exhaustion of virtual +memory. + +The argument @var{start} says where data space begins, in memory. The +allocator compares this against the last address used and against the +limit of data space, to determine the fraction of available memory in +use. If you supply zero for @var{start}, then a default value is used +which is right in most circumstances. + +For @var{warn-func}, supply a function that @code{malloc} can call to +warn you. It is called with a string (a warning message) as argument. +Normally it ought to display the string for the user to read. +@end deftypefun + +The warnings come when memory becomes 75% full, when it becomes 85% +full, and when it becomes 95% full. Above 95% you get another warning +each time memory usage increases. + +@end ignore diff --git a/REORG.TODO/manual/message.texi b/REORG.TODO/manual/message.texi new file mode 100644 index 0000000000..2dae3edeb9 --- /dev/null +++ b/REORG.TODO/manual/message.texi @@ -0,0 +1,1969 @@ +@node Message Translation, Searching and Sorting, Locales, Top +@c %MENU% How to make the program speak the user's language +@chapter Message Translation + +The program's interface with the user should be designed to ease the user's +task. One way to ease the user's task is to use messages in whatever +language the user prefers. + +Printing messages in different languages can be implemented in different +ways. One could add all the different languages in the source code and +choose among the variants every time a message has to be printed. This is +certainly not a good solution since extending the set of languages is +cumbersome (the code must be changed) and the code itself can become +really big with dozens of message sets. + +A better solution is to keep the message sets for each language +in separate files which are loaded at runtime depending on the language +selection of the user. + +@Theglibc{} provides two different sets of functions to support +message translation. The problem is that neither of the interfaces is +officially defined by the POSIX standard. The @code{catgets} family of +functions is defined in the X/Open standard but this is derived from +industry decisions and therefore not necessarily based on reasonable +decisions. + +As mentioned above, the message catalog handling provides easy +extendability by using external data files which contain the message +translations. I.e., these files contain for each of the messages used +in the program a translation for the appropriate language. So the tasks +of the message handling functions are + +@itemize @bullet +@item +locate the external data file with the appropriate translations +@item +load the data and make it possible to address the messages +@item +map a given key to the translated message +@end itemize + +The two approaches mainly differ in the implementation of this last +step. Decisions made in the last step influence the rest of the design. + +@menu +* Message catalogs a la X/Open:: The @code{catgets} family of functions. +* The Uniforum approach:: The @code{gettext} family of functions. +@end menu + + +@node Message catalogs a la X/Open +@section X/Open Message Catalog Handling + +The @code{catgets} functions are based on the simple scheme: + +@quotation +Associate every message to translate in the source code with a unique +identifier. To retrieve a message from a catalog file solely the +identifier is used. +@end quotation + +This means for the author of the program that s/he will have to make +sure the meaning of the identifier in the program code and in the +message catalogs is always the same. + +Before a message can be translated the catalog file must be located. +The user of the program must be able to guide the responsible function +to find whatever catalog the user wants. This is separated from what +the programmer had in mind. + +All the types, constants and functions for the @code{catgets} functions +are defined/declared in the @file{nl_types.h} header file. + +@menu +* The catgets Functions:: The @code{catgets} function family. +* The message catalog files:: Format of the message catalog files. +* The gencat program:: How to generate message catalogs files which + can be used by the functions. +* Common Usage:: How to use the @code{catgets} interface. +@end menu + + +@node The catgets Functions +@subsection The @code{catgets} function family + +@comment nl_types.h +@comment X/Open +@deftypefun nl_catd catopen (const char *@var{cat_name}, int @var{flag}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c catopen @mtsenv @ascuheap @acsmem +@c strchr ok +@c setlocale(,NULL) ok +@c getenv @mtsenv +@c strlen ok +@c alloca ok +@c stpcpy ok +@c malloc @ascuheap @acsmem +@c __open_catalog @ascuheap @acsmem +@c strchr ok +@c open_not_cancel_2 @acsfd +@c strlen ok +@c ENOUGH ok +@c alloca ok +@c memcpy ok +@c fxstat64 ok +@c __set_errno ok +@c mmap @acsmem +@c malloc dup @ascuheap @acsmem +@c read_not_cancel ok +@c free dup @ascuheap @acsmem +@c munmap ok +@c close_not_cancel_no_status ok +@c free @ascuheap @acsmem +The @code{catopen} function tries to locate the message data file named +@var{cat_name} and loads it when found. The return value is of an +opaque type and can be used in calls to the other functions to refer to +this loaded catalog. + +The return value is @code{(nl_catd) -1} in case the function failed and +no catalog was loaded. The global variable @var{errno} contains a code +for the error causing the failure. But even if the function call +succeeded this does not mean that all messages can be translated. + +Locating the catalog file must happen in a way which lets the user of +the program influence the decision. It is up to the user to decide +about the language to use and sometimes it is useful to use alternate +catalog files. All this can be specified by the user by setting some +environment variables. + +The first problem is to find out where all the message catalogs are +stored. Every program could have its own place to keep all the +different files but usually the catalog files are grouped by languages +and the catalogs for all programs are kept in the same place. + +@cindex NLSPATH environment variable +To tell the @code{catopen} function where the catalog for the program +can be found the user can set the environment variable @code{NLSPATH} to +a value which describes her/his choice. Since this value must be usable +for different languages and locales it cannot be a simple string. +Instead it is a format string (similar to @code{printf}'s). An example +is + +@smallexample +/usr/share/locale/%L/%N:/usr/share/locale/%L/LC_MESSAGES/%N +@end smallexample + +First one can see that more than one directory can be specified (with +the usual syntax of separating them by colons). The next things to +observe are the format string, @code{%L} and @code{%N} in this case. +The @code{catopen} function knows about several of them and the +replacement for all of them is of course different. + +@table @code +@item %N +This format element is substituted with the name of the catalog file. +This is the value of the @var{cat_name} argument given to +@code{catgets}. + +@item %L +This format element is substituted with the name of the currently +selected locale for translating messages. How this is determined is +explained below. + +@item %l +(This is the lowercase ell.) This format element is substituted with the +language element of the locale name. The string describing the selected +locale is expected to have the form +@code{@var{lang}[_@var{terr}[.@var{codeset}]]} and this format uses the +first part @var{lang}. + +@item %t +This format element is substituted by the territory part @var{terr} of +the name of the currently selected locale. See the explanation of the +format above. + +@item %c +This format element is substituted by the codeset part @var{codeset} of +the name of the currently selected locale. See the explanation of the +format above. + +@item %% +Since @code{%} is used as a meta character there must be a way to +express the @code{%} character in the result itself. Using @code{%%} +does this just like it works for @code{printf}. +@end table + + +Using @code{NLSPATH} allows arbitrary directories to be searched for +message catalogs while still allowing different languages to be used. +If the @code{NLSPATH} environment variable is not set, the default value +is + +@smallexample +@var{prefix}/share/locale/%L/%N:@var{prefix}/share/locale/%L/LC_MESSAGES/%N +@end smallexample + +@noindent +where @var{prefix} is given to @code{configure} while installing @theglibc{} +(this value is in many cases @code{/usr} or the empty string). + +The remaining problem is to decide which must be used. The value +decides about the substitution of the format elements mentioned above. +First of all the user can specify a path in the message catalog name +(i.e., the name contains a slash character). In this situation the +@code{NLSPATH} environment variable is not used. The catalog must exist +as specified in the program, perhaps relative to the current working +directory. This situation in not desirable and catalogs names never +should be written this way. Beside this, this behavior is not portable +to all other platforms providing the @code{catgets} interface. + +@cindex LC_ALL environment variable +@cindex LC_MESSAGES environment variable +@cindex LANG environment variable +Otherwise the values of environment variables from the standard +environment are examined (@pxref{Standard Environment}). Which +variables are examined is decided by the @var{flag} parameter of +@code{catopen}. If the value is @code{NL_CAT_LOCALE} (which is defined +in @file{nl_types.h}) then the @code{catopen} function uses the name of +the locale currently selected for the @code{LC_MESSAGES} category. + +If @var{flag} is zero the @code{LANG} environment variable is examined. +This is a left-over from the early days when the concept of locales +had not even reached the level of POSIX locales. + +The environment variable and the locale name should have a value of the +form @code{@var{lang}[_@var{terr}[.@var{codeset}]]} as explained above. +If no environment variable is set the @code{"C"} locale is used which +prevents any translation. + +The return value of the function is in any case a valid string. Either +it is a translation from a message catalog or it is the same as the +@var{string} parameter. So a piece of code to decide whether a +translation actually happened must look like this: + +@smallexample +@{ + char *trans = catgets (desc, set, msg, input_string); + if (trans == input_string) + @{ + /* Something went wrong. */ + @} +@} +@end smallexample + +@noindent +When an error occurs the global variable @var{errno} is set to + +@table @var +@item EBADF +The catalog does not exist. +@item ENOMSG +The set/message tuple does not name an existing element in the +message catalog. +@end table + +While it sometimes can be useful to test for errors programs normally +will avoid any test. If the translation is not available it is no big +problem if the original, untranslated message is printed. Either the +user understands this as well or s/he will look for the reason why the +messages are not translated. +@end deftypefun + +Please note that the currently selected locale does not depend on a call +to the @code{setlocale} function. It is not necessary that the locale +data files for this locale exist and calling @code{setlocale} succeeds. +The @code{catopen} function directly reads the values of the environment +variables. + + +@deftypefun {char *} catgets (nl_catd @var{catalog_desc}, int @var{set}, int @var{message}, const char *@var{string}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The function @code{catgets} has to be used to access the message catalog +previously opened using the @code{catopen} function. The +@var{catalog_desc} parameter must be a value previously returned by +@code{catopen}. + +The next two parameters, @var{set} and @var{message}, reflect the +internal organization of the message catalog files. This will be +explained in detail below. For now it is interesting to know that a +catalog can consist of several sets and the messages in each thread are +individually numbered using numbers. Neither the set number nor the +message number must be consecutive. They can be arbitrarily chosen. +But each message (unless equal to another one) must have its own unique +pair of set and message numbers. + +Since it is not guaranteed that the message catalog for the language +selected by the user exists the last parameter @var{string} helps to +handle this case gracefully. If no matching string can be found +@var{string} is returned. This means for the programmer that + +@itemize @bullet +@item +the @var{string} parameters should contain reasonable text (this also +helps to understand the program seems otherwise there would be no hint +on the string which is expected to be returned. +@item +all @var{string} arguments should be written in the same language. +@end itemize +@end deftypefun + +It is somewhat uncomfortable to write a program using the @code{catgets} +functions if no supporting functionality is available. Since each +set/message number tuple must be unique the programmer must keep lists +of the messages at the same time the code is written. And the work +between several people working on the same project must be coordinated. +We will see how some of these problems can be relaxed a bit (@pxref{Common +Usage}). + +@deftypefun int catclose (nl_catd @var{catalog_desc}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c catclose @ascuheap @acucorrupt @acsmem +@c __set_errno ok +@c munmap ok +@c free @ascuheap @acsmem +The @code{catclose} function can be used to free the resources +associated with a message catalog which previously was opened by a call +to @code{catopen}. If the resources can be successfully freed the +function returns @code{0}. Otherwise it returns @code{@minus{}1} and the +global variable @var{errno} is set. Errors can occur if the catalog +descriptor @var{catalog_desc} is not valid in which case @var{errno} is +set to @code{EBADF}. +@end deftypefun + + +@node The message catalog files +@subsection Format of the message catalog files + +The only reasonable way to translate all the messages of a function and +store the result in a message catalog file which can be read by the +@code{catopen} function is to write all the message text to the +translator and let her/him translate them all. I.e., we must have a +file with entries which associate the set/message tuple with a specific +translation. This file format is specified in the X/Open standard and +is as follows: + +@itemize @bullet +@item +Lines containing only whitespace characters or empty lines are ignored. + +@item +Lines which contain as the first non-whitespace character a @code{$} +followed by a whitespace character are comment and are also ignored. + +@item +If a line contains as the first non-whitespace characters the sequence +@code{$set} followed by a whitespace character an additional argument +is required to follow. This argument can either be: + +@itemize @minus +@item +a number. In this case the value of this number determines the set +to which the following messages are added. + +@item +an identifier consisting of alphanumeric characters plus the underscore +character. In this case the set get automatically a number assigned. +This value is one added to the largest set number which so far appeared. + +How to use the symbolic names is explained in section @ref{Common Usage}. + +It is an error if a symbol name appears more than once. All following +messages are placed in a set with this number. +@end itemize + +@item +If a line contains as the first non-whitespace characters the sequence +@code{$delset} followed by a whitespace character an additional argument +is required to follow. This argument can either be: + +@itemize @minus +@item +a number. In this case the value of this number determines the set +which will be deleted. + +@item +an identifier consisting of alphanumeric characters plus the underscore +character. This symbolic identifier must match a name for a set which +previously was defined. It is an error if the name is unknown. +@end itemize + +In both cases all messages in the specified set will be removed. They +will not appear in the output. But if this set is later again selected +with a @code{$set} command again messages could be added and these +messages will appear in the output. + +@item +If a line contains after leading whitespaces the sequence +@code{$quote}, the quoting character used for this input file is +changed to the first non-whitespace character following +@code{$quote}. If no non-whitespace character is present before the +line ends quoting is disabled. + +By default no quoting character is used. In this mode strings are +terminated with the first unescaped line break. If there is a +@code{$quote} sequence present newline need not be escaped. Instead a +string is terminated with the first unescaped appearance of the quote +character. + +A common usage of this feature would be to set the quote character to +@code{"}. Then any appearance of the @code{"} in the strings must +be escaped using the backslash (i.e., @code{\"} must be written). + +@item +Any other line must start with a number or an alphanumeric identifier +(with the underscore character included). The following characters +(starting after the first whitespace character) will form the string +which gets associated with the currently selected set and the message +number represented by the number and identifier respectively. + +If the start of the line is a number the message number is obvious. It +is an error if the same message number already appeared for this set. + +If the leading token was an identifier the message number gets +automatically assigned. The value is the current maximum message +number for this set plus one. It is an error if the identifier was +already used for a message in this set. It is OK to reuse the +identifier for a message in another thread. How to use the symbolic +identifiers will be explained below (@pxref{Common Usage}). There is +one limitation with the identifier: it must not be @code{Set}. The +reason will be explained below. + +The text of the messages can contain escape characters. The usual bunch +of characters known from the @w{ISO C} language are recognized +(@code{\n}, @code{\t}, @code{\v}, @code{\b}, @code{\r}, @code{\f}, +@code{\\}, and @code{\@var{nnn}}, where @var{nnn} is the octal coding of +a character code). +@end itemize + +@strong{Important:} The handling of identifiers instead of numbers for +the set and messages is a GNU extension. Systems strictly following the +X/Open specification do not have this feature. An example for a message +catalog file is this: + +@smallexample +$ This is a leading comment. +$quote " + +$set SetOne +1 Message with ID 1. +two " Message with ID \"two\", which gets the value 2 assigned" + +$set SetTwo +$ Since the last set got the number 1 assigned this set has number 2. +4000 "The numbers can be arbitrary, they need not start at one." +@end smallexample + +This small example shows various aspects: +@itemize @bullet +@item +Lines 1 and 9 are comments since they start with @code{$} followed by +a whitespace. +@item +The quoting character is set to @code{"}. Otherwise the quotes in the +message definition would have to be omitted and in this case the +message with the identifier @code{two} would lose its leading whitespace. +@item +Mixing numbered messages with messages having symbolic names is no +problem and the numbering happens automatically. +@end itemize + + +While this file format is pretty easy it is not the best possible for +use in a running program. The @code{catopen} function would have to +parse the file and handle syntactic errors gracefully. This is not so +easy and the whole process is pretty slow. Therefore the @code{catgets} +functions expect the data in another more compact and ready-to-use file +format. There is a special program @code{gencat} which is explained in +detail in the next section. + +Files in this other format are not human readable. To be easy to use by +programs it is a binary file. But the format is byte order independent +so translation files can be shared by systems of arbitrary architecture +(as long as they use @theglibc{}). + +Details about the binary file format are not important to know since +these files are always created by the @code{gencat} program. The +sources of @theglibc{} also provide the sources for the +@code{gencat} program and so the interested reader can look through +these source files to learn about the file format. + + +@node The gencat program +@subsection Generate Message Catalogs files + +@cindex gencat +The @code{gencat} program is specified in the X/Open standard and the +GNU implementation follows this specification and so processes +all correctly formed input files. Additionally some extension are +implemented which help to work in a more reasonable way with the +@code{catgets} functions. + +The @code{gencat} program can be invoked in two ways: + +@example +`gencat [@var{Option} @dots{}] [@var{Output-File} [@var{Input-File} @dots{}]]` +@end example + +This is the interface defined in the X/Open standard. If no +@var{Input-File} parameter is given, input will be read from standard +input. Multiple input files will be read as if they were concatenated. +If @var{Output-File} is also missing, the output will be written to +standard output. To provide the interface one is used to from other +programs a second interface is provided. + +@smallexample +`gencat [@var{Option} @dots{}] -o @var{Output-File} [@var{Input-File} @dots{}]` +@end smallexample + +The option @samp{-o} is used to specify the output file and all file +arguments are used as input files. + +Beside this one can use @file{-} or @file{/dev/stdin} for +@var{Input-File} to denote the standard input. Corresponding one can +use @file{-} and @file{/dev/stdout} for @var{Output-File} to denote +standard output. Using @file{-} as a file name is allowed in X/Open +while using the device names is a GNU extension. + +The @code{gencat} program works by concatenating all input files and +then @strong{merging} the resulting collection of message sets with a +possibly existing output file. This is done by removing all messages +with set/message number tuples matching any of the generated messages +from the output file and then adding all the new messages. To +regenerate a catalog file while ignoring the old contents therefore +requires removing the output file if it exists. If the output is +written to standard output no merging takes place. + +@noindent +The following table shows the options understood by the @code{gencat} +program. The X/Open standard does not specify any options for the +program so all of these are GNU extensions. + +@table @samp +@item -V +@itemx --version +Print the version information and exit. +@item -h +@itemx --help +Print a usage message listing all available options, then exit successfully. +@item --new +Do not merge the new messages from the input files with the old content +of the output file. The old content of the output file is discarded. +@item -H +@itemx --header=name +This option is used to emit the symbolic names given to sets and +messages in the input files for use in the program. Details about how +to use this are given in the next section. The @var{name} parameter to +this option specifies the name of the output file. It will contain a +number of C preprocessor @code{#define}s to associate a name with a +number. + +Please note that the generated file only contains the symbols from the +input files. If the output is merged with the previous content of the +output file the possibly existing symbols from the file(s) which +generated the old output files are not in the generated header file. +@end table + + +@node Common Usage +@subsection How to use the @code{catgets} interface + +The @code{catgets} functions can be used in two different ways. By +following slavishly the X/Open specs and not relying on the extension +and by using the GNU extensions. We will take a look at the former +method first to understand the benefits of extensions. + +@subsubsection Not using symbolic names + +Since the X/Open format of the message catalog files does not allow +symbol names we have to work with numbers all the time. When we start +writing a program we have to replace all appearances of translatable +strings with something like + +@smallexample +catgets (catdesc, set, msg, "string") +@end smallexample + +@noindent +@var{catgets} is retrieved from a call to @code{catopen} which is +normally done once at the program start. The @code{"string"} is the +string we want to translate. The problems start with the set and +message numbers. + +In a bigger program several programmers usually work at the same time on +the program and so coordinating the number allocation is crucial. +Though no two different strings must be indexed by the same tuple of +numbers it is highly desirable to reuse the numbers for equal strings +with equal translations (please note that there might be strings which +are equal in one language but have different translations due to +difference contexts). + +The allocation process can be relaxed a bit by different set numbers for +different parts of the program. So the number of developers who have to +coordinate the allocation can be reduced. But still lists must be keep +track of the allocation and errors can easily happen. These errors +cannot be discovered by the compiler or the @code{catgets} functions. +Only the user of the program might see wrong messages printed. In the +worst cases the messages are so irritating that they cannot be +recognized as wrong. Think about the translations for @code{"true"} and +@code{"false"} being exchanged. This could result in a disaster. + + +@subsubsection Using symbolic names + +The problems mentioned in the last section derive from the fact that: + +@enumerate +@item +the numbers are allocated once and due to the possibly frequent use of +them it is difficult to change a number later. +@item +the numbers do not allow guessing anything about the string and +therefore collisions can easily happen. +@end enumerate + +By constantly using symbolic names and by providing a method which maps +the string content to a symbolic name (however this will happen) one can +prevent both problems above. The cost of this is that the programmer +has to write a complete message catalog file while s/he is writing the +program itself. + +This is necessary since the symbolic names must be mapped to numbers +before the program sources can be compiled. In the last section it was +described how to generate a header containing the mapping of the names. +E.g., for the example message file given in the last section we could +call the @code{gencat} program as follows (assume @file{ex.msg} contains +the sources). + +@smallexample +gencat -H ex.h -o ex.cat ex.msg +@end smallexample + +@noindent +This generates a header file with the following content: + +@smallexample +#define SetTwoSet 0x2 /* ex.msg:8 */ + +#define SetOneSet 0x1 /* ex.msg:4 */ +#define SetOnetwo 0x2 /* ex.msg:6 */ +@end smallexample + +As can be seen the various symbols given in the source file are mangled +to generate unique identifiers and these identifiers get numbers +assigned. Reading the source file and knowing about the rules will +allow to predict the content of the header file (it is deterministic) +but this is not necessary. The @code{gencat} program can take care for +everything. All the programmer has to do is to put the generated header +file in the dependency list of the source files of her/his project and +add a rule to regenerate the header if any of the input files change. + +One word about the symbol mangling. Every symbol consists of two parts: +the name of the message set plus the name of the message or the special +string @code{Set}. So @code{SetOnetwo} means this macro can be used to +access the translation with identifier @code{two} in the message set +@code{SetOne}. + +The other names denote the names of the message sets. The special +string @code{Set} is used in the place of the message identifier. + +If in the code the second string of the set @code{SetOne} is used the C +code should look like this: + +@smallexample +catgets (catdesc, SetOneSet, SetOnetwo, + " Message with ID \"two\", which gets the value 2 assigned") +@end smallexample + +Writing the function this way will allow to change the message number +and even the set number without requiring any change in the C source +code. (The text of the string is normally not the same; this is only +for this example.) + + +@subsubsection How does to this allow to develop + +To illustrate the usual way to work with the symbolic version numbers +here is a little example. Assume we want to write the very complex and +famous greeting program. We start by writing the code as usual: + +@smallexample +#include <stdio.h> +int +main (void) +@{ + printf ("Hello, world!\n"); + return 0; +@} +@end smallexample + +Now we want to internationalize the message and therefore replace the +message with whatever the user wants. + +@smallexample +#include <nl_types.h> +#include <stdio.h> +#include "msgnrs.h" +int +main (void) +@{ + nl_catd catdesc = catopen ("hello.cat", NL_CAT_LOCALE); + printf (catgets (catdesc, SetMainSet, SetMainHello, + "Hello, world!\n")); + catclose (catdesc); + return 0; +@} +@end smallexample + +We see how the catalog object is opened and the returned descriptor used +in the other function calls. It is not really necessary to check for +failure of any of the functions since even in these situations the +functions will behave reasonable. They simply will be return a +translation. + +What remains unspecified here are the constants @code{SetMainSet} and +@code{SetMainHello}. These are the symbolic names describing the +message. To get the actual definitions which match the information in +the catalog file we have to create the message catalog source file and +process it using the @code{gencat} program. + +@smallexample +$ Messages for the famous greeting program. +$quote " + +$set Main +Hello "Hallo, Welt!\n" +@end smallexample + +Now we can start building the program (assume the message catalog source +file is named @file{hello.msg} and the program source file @file{hello.c}): + +@smallexample +% gencat -H msgnrs.h -o hello.cat hello.msg +% cat msgnrs.h +#define MainSet 0x1 /* hello.msg:4 */ +#define MainHello 0x1 /* hello.msg:5 */ +% gcc -o hello hello.c -I. +% cp hello.cat /usr/share/locale/de/LC_MESSAGES +% echo $LC_ALL +de +% ./hello +Hallo, Welt! +% +@end smallexample + +The call of the @code{gencat} program creates the missing header file +@file{msgnrs.h} as well as the message catalog binary. The former is +used in the compilation of @file{hello.c} while the later is placed in a +directory in which the @code{catopen} function will try to locate it. +Please check the @code{LC_ALL} environment variable and the default path +for @code{catopen} presented in the description above. + + +@node The Uniforum approach +@section The Uniforum approach to Message Translation + +Sun Microsystems tried to standardize a different approach to message +translation in the Uniforum group. There never was a real standard +defined but still the interface was used in Sun's operating systems. +Since this approach fits better in the development process of free +software it is also used throughout the GNU project and the GNU +@file{gettext} package provides support for this outside @theglibc{}. + +The code of the @file{libintl} from GNU @file{gettext} is the same as +the code in @theglibc{}. So the documentation in the GNU +@file{gettext} manual is also valid for the functionality here. The +following text will describe the library functions in detail. But the +numerous helper programs are not described in this manual. Instead +people should read the GNU @file{gettext} manual +(@pxref{Top,,GNU gettext utilities,gettext,Native Language Support Library and Tools}). +We will only give a short overview. + +Though the @code{catgets} functions are available by default on more +systems the @code{gettext} interface is at least as portable as the +former. The GNU @file{gettext} package can be used wherever the +functions are not available. + + +@menu +* Message catalogs with gettext:: The @code{gettext} family of functions. +* Helper programs for gettext:: Programs to handle message catalogs + for @code{gettext}. +@end menu + + +@node Message catalogs with gettext +@subsection The @code{gettext} family of functions + +The paradigms underlying the @code{gettext} approach to message +translations is different from that of the @code{catgets} functions the +basic functionally is equivalent. There are functions of the following +categories: + +@menu +* Translation with gettext:: What has to be done to translate a message. +* Locating gettext catalog:: How to determine which catalog to be used. +* Advanced gettext functions:: Additional functions for more complicated + situations. +* Charset conversion in gettext:: How to specify the output character set + @code{gettext} uses. +* GUI program problems:: How to use @code{gettext} in GUI programs. +* Using gettextized software:: The possibilities of the user to influence + the way @code{gettext} works. +@end menu + +@node Translation with gettext +@subsubsection What has to be done to translate a message? + +The @code{gettext} functions have a very simple interface. The most +basic function just takes the string which shall be translated as the +argument and it returns the translation. This is fundamentally +different from the @code{catgets} approach where an extra key is +necessary and the original string is only used for the error case. + +If the string which has to be translated is the only argument this of +course means the string itself is the key. I.e., the translation will +be selected based on the original string. The message catalogs must +therefore contain the original strings plus one translation for any such +string. The task of the @code{gettext} function is to compare the +argument string with the available strings in the catalog and return the +appropriate translation. Of course this process is optimized so that +this process is not more expensive than an access using an atomic key +like in @code{catgets}. + +The @code{gettext} approach has some advantages but also some +disadvantages. Please see the GNU @file{gettext} manual for a detailed +discussion of the pros and cons. + +All the definitions and declarations for @code{gettext} can be found in +the @file{libintl.h} header file. On systems where these functions are +not part of the C library they can be found in a separate library named +@file{libintl.a} (or accordingly different for shared libraries). + +@comment libintl.h +@comment GNU +@deftypefun {char *} gettext (const char *@var{msgid}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c Wrapper for dcgettext. +The @code{gettext} function searches the currently selected message +catalogs for a string which is equal to @var{msgid}. If there is such a +string available it is returned. Otherwise the argument string +@var{msgid} is returned. + +Please note that although the return value is @code{char *} the +returned string must not be changed. This broken type results from the +history of the function and does not reflect the way the function should +be used. + +Please note that above we wrote ``message catalogs'' (plural). This is +a specialty of the GNU implementation of these functions and we will +say more about this when we talk about the ways message catalogs are +selected (@pxref{Locating gettext catalog}). + +The @code{gettext} function does not modify the value of the global +@var{errno} variable. This is necessary to make it possible to write +something like + +@smallexample + printf (gettext ("Operation failed: %m\n")); +@end smallexample + +Here the @var{errno} value is used in the @code{printf} function while +processing the @code{%m} format element and if the @code{gettext} +function would change this value (it is called before @code{printf} is +called) we would get a wrong message. + +So there is no easy way to detect a missing message catalog besides +comparing the argument string with the result. But it is normally the +task of the user to react on missing catalogs. The program cannot guess +when a message catalog is really necessary since for a user who speaks +the language the program was developed in, the message does not need any translation. +@end deftypefun + +The remaining two functions to access the message catalog add some +functionality to select a message catalog which is not the default one. +This is important if parts of the program are developed independently. +Every part can have its own message catalog and all of them can be used +at the same time. The C library itself is an example: internally it +uses the @code{gettext} functions but since it must not depend on a +currently selected default message catalog it must specify all ambiguous +information. + +@comment libintl.h +@comment GNU +@deftypefun {char *} dgettext (const char *@var{domainname}, const char *@var{msgid}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c Wrapper for dcgettext. +The @code{dgettext} function acts just like the @code{gettext} +function. It only takes an additional first argument @var{domainname} +which guides the selection of the message catalogs which are searched +for the translation. If the @var{domainname} parameter is the null +pointer the @code{dgettext} function is exactly equivalent to +@code{gettext} since the default value for the domain name is used. + +As for @code{gettext} the return value type is @code{char *} which is an +anachronism. The returned string must never be modified. +@end deftypefun + +@comment libintl.h +@comment GNU +@deftypefun {char *} dcgettext (const char *@var{domainname}, const char *@var{msgid}, int @var{category}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c dcgettext @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c dcigettext @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c libc_rwlock_rdlock @asulock @aculock +@c current_locale_name ok [protected from @mtslocale] +@c tfind ok +@c libc_rwlock_unlock ok +@c plural_lookup ok +@c plural_eval ok +@c rawmemchr ok +@c DETERMINE_SECURE ok, nothing +@c strcmp ok +@c strlen ok +@c getcwd @ascuheap @acsmem @acsfd +@c strchr ok +@c stpcpy ok +@c category_to_name ok +@c guess_category_value @mtsenv +@c getenv @mtsenv +@c current_locale_name dup ok [protected from @mtslocale by dcigettext] +@c strcmp ok +@c ENABLE_SECURE ok +@c _nl_find_domain @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c libc_rwlock_rdlock dup @asulock @aculock +@c _nl_make_l10nflist dup @ascuheap @acsmem +@c libc_rwlock_unlock dup ok +@c _nl_load_domain @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock_recursive @aculock +@c libc_lock_unlock_recursive @aculock +@c open->open_not_cancel_2 @acsfd +@c fstat ok +@c mmap dup @acsmem +@c close->close_not_cancel_no_status @acsfd +@c malloc dup @ascuheap @acsmem +@c read->read_not_cancel ok +@c munmap dup @acsmem +@c W dup ok +@c strlen dup ok +@c get_sysdep_segment_value ok +@c memcpy dup ok +@c hash_string dup ok +@c free dup @ascuheap @acsmem +@c libc_rwlock_init ok +@c _nl_find_msg dup @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c libc_rwlock_fini ok +@c EXTRACT_PLURAL_EXPRESSION @ascuheap @acsmem +@c strstr dup ok +@c isspace ok +@c strtoul ok +@c PLURAL_PARSE @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c INIT_GERMANIC_PLURAL ok, nothing +@c the pre-C99 variant is @acucorrupt [protected from @mtuinit by dcigettext] +@c _nl_expand_alias dup @ascuheap @asulock @acsmem @acsfd @aculock +@c _nl_explode_name dup @ascuheap @acsmem +@c libc_rwlock_wrlock dup @asulock @aculock +@c free dup @asulock @aculock @acsfd @acsmem +@c _nl_find_msg @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c _nl_load_domain dup @mtsenv @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsfd @acsmem +@c strlen ok +@c hash_string ok +@c W ok +@c SWAP ok +@c bswap_32 ok +@c strcmp ok +@c get_output_charset @mtsenv @ascuheap @acsmem +@c getenv dup @mtsenv +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c memcpy dup ok +@c libc_rwlock_rdlock dup @asulock @aculock +@c libc_rwlock_unlock dup ok +@c libc_rwlock_wrlock dup @asulock @aculock +@c realloc @ascuheap @acsmem +@c strdup @ascuheap @acsmem +@c strstr ok +@c strcspn ok +@c mempcpy dup ok +@c norm_add_slashes dup ok +@c gconv_open @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c [protected from @mtslocale by dcigettext locale lock] +@c free dup @ascuheap @acsmem +@c libc_lock_lock @asulock @aculock +@c calloc @ascuheap @acsmem +@c gconv dup @acucorrupt [protected from @mtsrace and @asucorrupt by lock] +@c libc_lock_unlock ok +@c malloc @ascuheap @acsmem +@c mempcpy ok +@c memcpy ok +@c strcpy ok +@c libc_rwlock_wrlock @asulock @aculock +@c tsearch @ascuheap @acucorrupt @acsmem [protected from @mtsrace and @asucorrupt] +@c transcmp ok +@c strmp dup ok +@c free @ascuheap @acsmem +The @code{dcgettext} adds another argument to those which +@code{dgettext} takes. This argument @var{category} specifies the last +piece of information needed to localize the message catalog. I.e., the +domain name and the locale category exactly specify which message +catalog has to be used (relative to a given directory, see below). + +The @code{dgettext} function can be expressed in terms of +@code{dcgettext} by using + +@smallexample +dcgettext (domain, string, LC_MESSAGES) +@end smallexample + +@noindent +instead of + +@smallexample +dgettext (domain, string) +@end smallexample + +This also shows which values are expected for the third parameter. One +has to use the available selectors for the categories available in +@file{locale.h}. Normally the available values are @code{LC_CTYPE}, +@code{LC_COLLATE}, @code{LC_MESSAGES}, @code{LC_MONETARY}, +@code{LC_NUMERIC}, and @code{LC_TIME}. Please note that @code{LC_ALL} +must not be used and even though the names might suggest this, there is +no relation to the environment variable of this name. + +The @code{dcgettext} function is only implemented for compatibility with +other systems which have @code{gettext} functions. There is not really +any situation where it is necessary (or useful) to use a different value +than @code{LC_MESSAGES} for the @var{category} parameter. We are +dealing with messages here and any other choice can only be irritating. + +As for @code{gettext} the return value type is @code{char *} which is an +anachronism. The returned string must never be modified. +@end deftypefun + +When using the three functions above in a program it is a frequent case +that the @var{msgid} argument is a constant string. So it is worthwhile to +optimize this case. Thinking shortly about this one will realize that +as long as no new message catalog is loaded the translation of a message +will not change. This optimization is actually implemented by the +@code{gettext}, @code{dgettext} and @code{dcgettext} functions. + + +@node Locating gettext catalog +@subsubsection How to determine which catalog to be used + +The functions to retrieve the translations for a given message have a +remarkable simple interface. But to provide the user of the program +still the opportunity to select exactly the translation s/he wants and +also to provide the programmer the possibility to influence the way to +locate the search for catalogs files there is a quite complicated +underlying mechanism which controls all this. The code is complicated +the use is easy. + +Basically we have two different tasks to perform which can also be +performed by the @code{catgets} functions: + +@enumerate +@item +Locate the set of message catalogs. There are a number of files for +different languages which all belong to the package. Usually they +are all stored in the filesystem below a certain directory. + +There can be arbitrarily many packages installed and they can follow +different guidelines for the placement of their files. + +@item +Relative to the location specified by the package the actual translation +files must be searched, based on the wishes of the user. I.e., for each +language the user selects the program should be able to locate the +appropriate file. +@end enumerate + +This is the functionality required by the specifications for +@code{gettext} and this is also what the @code{catgets} functions are +able to do. But there are some problems unresolved: + +@itemize @bullet +@item +The language to be used can be specified in several different ways. +There is no generally accepted standard for this and the user always +expects the program to understand what s/he means. E.g., to select the +German translation one could write @code{de}, @code{german}, or +@code{deutsch} and the program should always react the same. + +@item +Sometimes the specification of the user is too detailed. If s/he, e.g., +specifies @code{de_DE.ISO-8859-1} which means German, spoken in Germany, +coded using the @w{ISO 8859-1} character set there is the possibility +that a message catalog matching this exactly is not available. But +there could be a catalog matching @code{de} and if the character set +used on the machine is always @w{ISO 8859-1} there is no reason why this +later message catalog should not be used. (We call this @dfn{message +inheritance}.) + +@item +If a catalog for a wanted language is not available it is not always the +second best choice to fall back on the language of the developer and +simply not translate any message. Instead a user might be better able +to read the messages in another language and so the user of the program +should be able to define a precedence order of languages. +@end itemize + +We can divide the configuration actions in two parts: the one is +performed by the programmer, the other by the user. We will start with +the functions the programmer can use since the user configuration will +be based on this. + +As the functions described in the last sections already mention separate +sets of messages can be selected by a @dfn{domain name}. This is a +simple string which should be unique for each program part that uses a +separate domain. It is possible to use in one program arbitrarily many +domains at the same time. E.g., @theglibc{} itself uses a domain +named @code{libc} while the program using the C Library could use a +domain named @code{foo}. The important point is that at any time +exactly one domain is active. This is controlled with the following +function. + +@comment libintl.h +@comment GNU +@deftypefun {char *} textdomain (const char *@var{domainname}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +@c textdomain @asulock @ascuheap @aculock @acsmem +@c libc_rwlock_wrlock @asulock @aculock +@c strcmp ok +@c strdup @ascuheap @acsmem +@c free @ascuheap @acsmem +@c libc_rwlock_unlock ok +The @code{textdomain} function sets the default domain, which is used in +all future @code{gettext} calls, to @var{domainname}. Please note that +@code{dgettext} and @code{dcgettext} calls are not influenced if the +@var{domainname} parameter of these functions is not the null pointer. + +Before the first call to @code{textdomain} the default domain is +@code{messages}. This is the name specified in the specification of +the @code{gettext} API. This name is as good as any other name. No +program should ever really use a domain with this name since this can +only lead to problems. + +The function returns the value which is from now on taken as the default +domain. If the system went out of memory the returned value is +@code{NULL} and the global variable @var{errno} is set to @code{ENOMEM}. +Despite the return value type being @code{char *} the return string must +not be changed. It is allocated internally by the @code{textdomain} +function. + +If the @var{domainname} parameter is the null pointer no new default +domain is set. Instead the currently selected default domain is +returned. + +If the @var{domainname} parameter is the empty string the default domain +is reset to its initial value, the domain with the name @code{messages}. +This possibility is questionable to use since the domain @code{messages} +really never should be used. +@end deftypefun + +@comment libintl.h +@comment GNU +@deftypefun {char *} bindtextdomain (const char *@var{domainname}, const char *@var{dirname}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c bindtextdomain @ascuheap @acsmem +@c set_binding_values @ascuheap @acsmem +@c libc_rwlock_wrlock dup @asulock @aculock +@c strcmp dup ok +@c strdup dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +The @code{bindtextdomain} function can be used to specify the directory +which contains the message catalogs for domain @var{domainname} for the +different languages. To be correct, this is the directory where the +hierarchy of directories is expected. Details are explained below. + +For the programmer it is important to note that the translations which +come with the program have to be placed in a directory hierarchy starting +at, say, @file{/foo/bar}. Then the program should make a +@code{bindtextdomain} call to bind the domain for the current program to +this directory. So it is made sure the catalogs are found. A correctly +running program does not depend on the user setting an environment +variable. + +The @code{bindtextdomain} function can be used several times and if the +@var{domainname} argument is different the previously bound domains +will not be overwritten. + +If the program which wish to use @code{bindtextdomain} at some point of +time use the @code{chdir} function to change the current working +directory it is important that the @var{dirname} strings ought to be an +absolute pathname. Otherwise the addressed directory might vary with +the time. + +If the @var{dirname} parameter is the null pointer @code{bindtextdomain} +returns the currently selected directory for the domain with the name +@var{domainname}. + +The @code{bindtextdomain} function returns a pointer to a string +containing the name of the selected directory name. The string is +allocated internally in the function and must not be changed by the +user. If the system went out of core during the execution of +@code{bindtextdomain} the return value is @code{NULL} and the global +variable @var{errno} is set accordingly. +@end deftypefun + + +@node Advanced gettext functions +@subsubsection Additional functions for more complicated situations + +The functions of the @code{gettext} family described so far (and all the +@code{catgets} functions as well) have one problem in the real world +which has been neglected completely in all existing approaches. What +is meant here is the handling of plural forms. + +Looking through Unix source code before the time anybody thought about +internationalization (and, sadly, even afterwards) one can often find +code similar to the following: + +@smallexample + printf ("%d file%s deleted", n, n == 1 ? "" : "s"); +@end smallexample + +@noindent +After the first complaints from people internationalizing the code people +either completely avoided formulations like this or used strings like +@code{"file(s)"}. Both look unnatural and should be avoided. First +tries to solve the problem correctly looked like this: + +@smallexample + if (n == 1) + printf ("%d file deleted", n); + else + printf ("%d files deleted", n); +@end smallexample + +But this does not solve the problem. It helps languages where the +plural form of a noun is not simply constructed by adding an `s' but +that is all. Once again people fell into the trap of believing the +rules their language uses are universal. But the handling of plural +forms differs widely between the language families. There are two +things we can differ between (and even inside language families); + +@itemize @bullet +@item +The form how plural forms are build differs. This is a problem with +language which have many irregularities. German, for instance, is a +drastic case. Though English and German are part of the same language +family (Germanic), the almost regular forming of plural noun forms +(appending an `s') is hardly found in German. + +@item +The number of plural forms differ. This is somewhat surprising for +those who only have experiences with Romanic and Germanic languages +since here the number is the same (there are two). + +But other language families have only one form or many forms. More +information on this in an extra section. +@end itemize + +The consequence of this is that application writers should not try to +solve the problem in their code. This would be localization since it is +only usable for certain, hardcoded language environments. Instead the +extended @code{gettext} interface should be used. + +These extra functions are taking instead of the one key string two +strings and a numerical argument. The idea behind this is that using +the numerical argument and the first string as a key, the implementation +can select using rules specified by the translator the right plural +form. The two string arguments then will be used to provide a return +value in case no message catalog is found (similar to the normal +@code{gettext} behavior). In this case the rules for Germanic language +are used and it is assumed that the first string argument is the singular +form, the second the plural form. + +This has the consequence that programs without language catalogs can +display the correct strings only if the program itself is written using +a Germanic language. This is a limitation but since @theglibc{} +(as well as the GNU @code{gettext} package) is written as part of the +GNU package and the coding standards for the GNU project require programs +to be written in English, this solution nevertheless fulfills its +purpose. + +@comment libintl.h +@comment GNU +@deftypefun {char *} ngettext (const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c Wrapper for dcngettext. +The @code{ngettext} function is similar to the @code{gettext} function +as it finds the message catalogs in the same way. But it takes two +extra arguments. The @var{msgid1} parameter must contain the singular +form of the string to be converted. It is also used as the key for the +search in the catalog. The @var{msgid2} parameter is the plural form. +The parameter @var{n} is used to determine the plural form. If no +message catalog is found @var{msgid1} is returned if @code{n == 1}, +otherwise @code{msgid2}. + +An example for the use of this function is: + +@smallexample + printf (ngettext ("%d file removed", "%d files removed", n), n); +@end smallexample + +Please note that the numeric value @var{n} has to be passed to the +@code{printf} function as well. It is not sufficient to pass it only to +@code{ngettext}. +@end deftypefun + +@comment libintl.h +@comment GNU +@deftypefun {char *} dngettext (const char *@var{domain}, const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c Wrapper for dcngettext. +The @code{dngettext} is similar to the @code{dgettext} function in the +way the message catalog is selected. The difference is that it takes +two extra parameters to provide the correct plural form. These two +parameters are handled in the same way @code{ngettext} handles them. +@end deftypefun + +@comment libintl.h +@comment GNU +@deftypefun {char *} dcngettext (const char *@var{domain}, const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n}, int @var{category}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c Wrapper for dcigettext. +The @code{dcngettext} is similar to the @code{dcgettext} function in the +way the message catalog is selected. The difference is that it takes +two extra parameters to provide the correct plural form. These two +parameters are handled in the same way @code{ngettext} handles them. +@end deftypefun + +@subsubheading The problem of plural forms + +A description of the problem can be found at the beginning of the last +section. Now there is the question how to solve it. Without the input +of linguists (which was not available) it was not possible to determine +whether there are only a few different forms in which plural forms are +formed or whether the number can increase with every new supported +language. + +Therefore the solution implemented is to allow the translator to specify +the rules of how to select the plural form. Since the formula varies +with every language this is the only viable solution except for +hardcoding the information in the code (which still would require the +possibility of extensions to not prevent the use of new languages). The +details are explained in the GNU @code{gettext} manual. Here only a +bit of information is provided. + +The information about the plural form selection has to be stored in the +header entry (the one with the empty @code{msgid} string). It looks +like this: + +@smallexample +Plural-Forms: nplurals=2; plural=n == 1 ? 0 : 1; +@end smallexample + +The @code{nplurals} value must be a decimal number which specifies how +many different plural forms exist for this language. The string +following @code{plural} is an expression using the C language +syntax. Exceptions are that no negative numbers are allowed, numbers +must be decimal, and the only variable allowed is @code{n}. This +expression will be evaluated whenever one of the functions +@code{ngettext}, @code{dngettext}, or @code{dcngettext} is called. The +numeric value passed to these functions is then substituted for all uses +of the variable @code{n} in the expression. The resulting value then +must be greater or equal to zero and smaller than the value given as the +value of @code{nplurals}. + +@noindent +The following rules are known at this point. The language with families +are listed. But this does not necessarily mean the information can be +generalized for the whole family (as can be easily seen in the table +below).@footnote{Additions are welcome. Send appropriate information to +@email{bug-glibc-manual@@gnu.org}.} + +@table @asis +@item Only one form: +Some languages only require one single form. There is no distinction +between the singular and plural form. An appropriate header entry +would look like this: + +@smallexample +Plural-Forms: nplurals=1; plural=0; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Finno-Ugric family +Hungarian +@item Asian family +Japanese, Korean +@item Turkic/Altaic family +Turkish +@end table + +@item Two forms, singular used for one only +This is the form used in most existing programs since it is what English +uses. A header entry would look like this: + +@smallexample +Plural-Forms: nplurals=2; plural=n != 1; +@end smallexample + +(Note: this uses the feature of C expressions that boolean expressions +have to value zero or one.) + +@noindent +Languages with this property include: + +@table @asis +@item Germanic family +Danish, Dutch, English, German, Norwegian, Swedish +@item Finno-Ugric family +Estonian, Finnish +@item Latin/Greek family +Greek +@item Semitic family +Hebrew +@item Romance family +Italian, Portuguese, Spanish +@item Artificial +Esperanto +@end table + +@item Two forms, singular used for zero and one +Exceptional case in the language family. The header entry would be: + +@smallexample +Plural-Forms: nplurals=2; plural=n>1; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Romanic family +French, Brazilian Portuguese +@end table + +@item Three forms, special case for zero +The header entry would be: + +@smallexample +Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Baltic family +Latvian +@end table + +@item Three forms, special cases for one and two +The header entry would be: + +@smallexample +Plural-Forms: nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Celtic +Gaeilge (Irish) +@end table + +@item Three forms, special case for numbers ending in 1[2-9] +The header entry would look like this: + +@smallexample +Plural-Forms: nplurals=3; \ + plural=n%10==1 && n%100!=11 ? 0 : \ + n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Baltic family +Lithuanian +@end table + +@item Three forms, special cases for numbers ending in 1 and 2, 3, 4, except those ending in 1[1-4] +The header entry would look like this: + +@smallexample +Plural-Forms: nplurals=3; \ + plural=n%100/10==1 ? 2 : n%10==1 ? 0 : (n+9)%10>3 ? 2 : 1; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Slavic family +Croatian, Czech, Russian, Ukrainian +@end table + +@item Three forms, special cases for 1 and 2, 3, 4 +The header entry would look like this: + +@smallexample +Plural-Forms: nplurals=3; \ + plural=(n==1) ? 1 : (n>=2 && n<=4) ? 2 : 0; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Slavic family +Slovak +@end table + +@item Three forms, special case for one and some numbers ending in 2, 3, or 4 +The header entry would look like this: + +@smallexample +Plural-Forms: nplurals=3; \ + plural=n==1 ? 0 : \ + n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Slavic family +Polish +@end table + +@item Four forms, special case for one and all numbers ending in 02, 03, or 04 +The header entry would look like this: + +@smallexample +Plural-Forms: nplurals=4; \ + plural=n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3; +@end smallexample + +@noindent +Languages with this property include: + +@table @asis +@item Slavic family +Slovenian +@end table +@end table + + +@node Charset conversion in gettext +@subsubsection How to specify the output character set @code{gettext} uses + +@code{gettext} not only looks up a translation in a message catalog, it +also converts the translation on the fly to the desired output character +set. This is useful if the user is working in a different character set +than the translator who created the message catalog, because it avoids +distributing variants of message catalogs which differ only in the +character set. + +The output character set is, by default, the value of @code{nl_langinfo +(CODESET)}, which depends on the @code{LC_CTYPE} part of the current +locale. But programs which store strings in a locale independent way +(e.g. UTF-8) can request that @code{gettext} and related functions +return the translations in that encoding, by use of the +@code{bind_textdomain_codeset} function. + +Note that the @var{msgid} argument to @code{gettext} is not subject to +character set conversion. Also, when @code{gettext} does not find a +translation for @var{msgid}, it returns @var{msgid} unchanged -- +independently of the current output character set. It is therefore +recommended that all @var{msgid}s be US-ASCII strings. + +@comment libintl.h +@comment GNU +@deftypefun {char *} bind_textdomain_codeset (const char *@var{domainname}, const char *@var{codeset}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c bind_textdomain_codeset @ascuheap @acsmem +@c set_binding_values dup @ascuheap @acsmem +The @code{bind_textdomain_codeset} function can be used to specify the +output character set for message catalogs for domain @var{domainname}. +The @var{codeset} argument must be a valid codeset name which can be used +for the @code{iconv_open} function, or a null pointer. + +If the @var{codeset} parameter is the null pointer, +@code{bind_textdomain_codeset} returns the currently selected codeset +for the domain with the name @var{domainname}. It returns @code{NULL} if +no codeset has yet been selected. + +The @code{bind_textdomain_codeset} function can be used several times. +If used multiple times with the same @var{domainname} argument, the +later call overrides the settings made by the earlier one. + +The @code{bind_textdomain_codeset} function returns a pointer to a +string containing the name of the selected codeset. The string is +allocated internally in the function and must not be changed by the +user. If the system went out of core during the execution of +@code{bind_textdomain_codeset}, the return value is @code{NULL} and the +global variable @var{errno} is set accordingly. +@end deftypefun + + +@node GUI program problems +@subsubsection How to use @code{gettext} in GUI programs + +One place where the @code{gettext} functions, if used normally, have big +problems is within programs with graphical user interfaces (GUIs). The +problem is that many of the strings which have to be translated are very +short. They have to appear in pull-down menus which restricts the +length. But strings which are not containing entire sentences or at +least large fragments of a sentence may appear in more than one +situation in the program but might have different translations. This is +especially true for the one-word strings which are frequently used in +GUI programs. + +As a consequence many people say that the @code{gettext} approach is +wrong and instead @code{catgets} should be used which indeed does not +have this problem. But there is a very simple and powerful method to +handle these kind of problems with the @code{gettext} functions. + +@noindent +As an example consider the following fictional situation. A GUI program +has a menu bar with the following entries: + +@smallexample ++------------+------------+--------------------------------------+ +| File | Printer | | ++------------+------------+--------------------------------------+ +| Open | | Select | +| New | | Open | ++----------+ | Connect | + +----------+ +@end smallexample + +To have the strings @code{File}, @code{Printer}, @code{Open}, +@code{New}, @code{Select}, and @code{Connect} translated there has to be +at some point in the code a call to a function of the @code{gettext} +family. But in two places the string passed into the function would be +@code{Open}. The translations might not be the same and therefore we +are in the dilemma described above. + +One solution to this problem is to artificially extend the strings +to make them unambiguous. But what would the program do if no +translation is available? The extended string is not what should be +printed. So we should use a slightly modified version of the functions. + +To extend the strings a uniform method should be used. E.g., in the +example above, the strings could be chosen as + +@smallexample +Menu|File +Menu|Printer +Menu|File|Open +Menu|File|New +Menu|Printer|Select +Menu|Printer|Open +Menu|Printer|Connect +@end smallexample + +Now all the strings are different and if now instead of @code{gettext} +the following little wrapper function is used, everything works just +fine: + +@cindex sgettext +@smallexample + char * + sgettext (const char *msgid) + @{ + char *msgval = gettext (msgid); + if (msgval == msgid) + msgval = strrchr (msgid, '|') + 1; + return msgval; + @} +@end smallexample + +What this little function does is to recognize the case when no +translation is available. This can be done very efficiently by a +pointer comparison since the return value is the input value. If there +is no translation we know that the input string is in the format we used +for the Menu entries and therefore contains a @code{|} character. We +simply search for the last occurrence of this character and return a +pointer to the character following it. That's it! + +If one now consistently uses the extended string form and replaces +the @code{gettext} calls with calls to @code{sgettext} (this is normally +limited to very few places in the GUI implementation) then it is +possible to produce a program which can be internationalized. + +With advanced compilers (such as GNU C) one can write the +@code{sgettext} functions as an inline function or as a macro like this: + +@cindex sgettext +@smallexample +#define sgettext(msgid) \ + (@{ const char *__msgid = (msgid); \ + char *__msgstr = gettext (__msgid); \ + if (__msgval == __msgid) \ + __msgval = strrchr (__msgid, '|') + 1; \ + __msgval; @}) +@end smallexample + +The other @code{gettext} functions (@code{dgettext}, @code{dcgettext} +and the @code{ngettext} equivalents) can and should have corresponding +functions as well which look almost identical, except for the parameters +and the call to the underlying function. + +Now there is of course the question why such functions do not exist in +@theglibc{}? There are two parts of the answer to this question. + +@itemize @bullet +@item +They are easy to write and therefore can be provided by the project they +are used in. This is not an answer by itself and must be seen together +with the second part which is: + +@item +There is no way the C library can contain a version which can work +everywhere. The problem is the selection of the character to separate +the prefix from the actual string in the extended string. The +examples above used @code{|} which is a quite good choice because it +resembles a notation frequently used in this context and it also is a +character not often used in message strings. + +But what if the character is used in message strings. Or if the chose +character is not available in the character set on the machine one +compiles (e.g., @code{|} is not required to exist for @w{ISO C}; this is +why the @file{iso646.h} file exists in @w{ISO C} programming environments). +@end itemize + +There is only one more comment to make left. The wrapper function above +requires that the translations strings are not extended themselves. +This is only logical. There is no need to disambiguate the strings +(since they are never used as keys for a search) and one also saves +quite some memory and disk space by doing this. + + +@node Using gettextized software +@subsubsection User influence on @code{gettext} + +The last sections described what the programmer can do to +internationalize the messages of the program. But it is finally up to +the user to select the message s/he wants to see. S/He must understand +them. + +The POSIX locale model uses the environment variables @code{LC_COLLATE}, +@code{LC_CTYPE}, @code{LC_MESSAGES}, @code{LC_MONETARY}, @code{LC_NUMERIC}, +and @code{LC_TIME} to select the locale which is to be used. This way +the user can influence lots of functions. As we mentioned above, the +@code{gettext} functions also take advantage of this. + +To understand how this happens it is necessary to take a look at the +various components of the filename which gets computed to locate a +message catalog. It is composed as follows: + +@smallexample +@var{dir_name}/@var{locale}/LC_@var{category}/@var{domain_name}.mo +@end smallexample + +The default value for @var{dir_name} is system specific. It is computed +from the value given as the prefix while configuring the C library. +This value normally is @file{/usr} or @file{/}. For the former the +complete @var{dir_name} is: + +@smallexample +/usr/share/locale +@end smallexample + +We can use @file{/usr/share} since the @file{.mo} files containing the +message catalogs are system independent, so all systems can use the same +files. If the program executed the @code{bindtextdomain} function for +the message domain that is currently handled, the @code{dir_name} +component is exactly the value which was given to the function as +the second parameter. I.e., @code{bindtextdomain} allows overwriting +the only system dependent and fixed value to make it possible to +address files anywhere in the filesystem. + +The @var{category} is the name of the locale category which was selected +in the program code. For @code{gettext} and @code{dgettext} this is +always @code{LC_MESSAGES}, for @code{dcgettext} this is selected by the +value of the third parameter. As said above it should be avoided to +ever use a category other than @code{LC_MESSAGES}. + +The @var{locale} component is computed based on the category used. Just +like for the @code{setlocale} function here comes the user selection +into the play. Some environment variables are examined in a fixed order +and the first environment variable set determines the return value of +the lookup process. In detail, for the category @code{LC_xxx} the +following variables in this order are examined: + +@table @code +@item LANGUAGE +@item LC_ALL +@item LC_xxx +@item LANG +@end table + +This looks very familiar. With the exception of the @code{LANGUAGE} +environment variable this is exactly the lookup order the +@code{setlocale} function uses. But why introduce the @code{LANGUAGE} +variable? + +The reason is that the syntax of the values these variables can have is +different to what is expected by the @code{setlocale} function. If we +would set @code{LC_ALL} to a value following the extended syntax that +would mean the @code{setlocale} function will never be able to use the +value of this variable as well. An additional variable removes this +problem plus we can select the language independently of the locale +setting which sometimes is useful. + +While for the @code{LC_xxx} variables the value should consist of +exactly one specification of a locale the @code{LANGUAGE} variable's +value can consist of a colon separated list of locale names. The +attentive reader will realize that this is the way we manage to +implement one of our additional demands above: we want to be able to +specify an ordered list of languages. + +Back to the constructed filename we have only one component missing. +The @var{domain_name} part is the name which was either registered using +the @code{textdomain} function or which was given to @code{dgettext} or +@code{dcgettext} as the first parameter. Now it becomes obvious that a +good choice for the domain name in the program code is a string which is +closely related to the program/package name. E.g., for @theglibc{} +the domain name is @code{libc}. + +@noindent +A limited piece of example code should show how the program is supposed +to work: + +@smallexample +@{ + setlocale (LC_ALL, ""); + textdomain ("test-package"); + bindtextdomain ("test-package", "/usr/local/share/locale"); + puts (gettext ("Hello, world!")); +@} +@end smallexample + +At the program start the default domain is @code{messages}, and the +default locale is "C". The @code{setlocale} call sets the locale +according to the user's environment variables; remember that correct +functioning of @code{gettext} relies on the correct setting of the +@code{LC_MESSAGES} locale (for looking up the message catalog) and +of the @code{LC_CTYPE} locale (for the character set conversion). +The @code{textdomain} call changes the default domain to +@code{test-package}. The @code{bindtextdomain} call specifies that +the message catalogs for the domain @code{test-package} can be found +below the directory @file{/usr/local/share/locale}. + +If the user sets in her/his environment the variable @code{LANGUAGE} +to @code{de} the @code{gettext} function will try to use the +translations from the file + +@smallexample +/usr/local/share/locale/de/LC_MESSAGES/test-package.mo +@end smallexample + +From the above descriptions it should be clear which component of this +filename is determined by which source. + +In the above example we assumed the @code{LANGUAGE} environment +variable to be @code{de}. This might be an appropriate selection but what +happens if the user wants to use @code{LC_ALL} because of the wider +usability and here the required value is @code{de_DE.ISO-8859-1}? We +already mentioned above that a situation like this is not infrequent. +E.g., a person might prefer reading a dialect and if this is not +available fall back on the standard language. + +The @code{gettext} functions know about situations like this and can +handle them gracefully. The functions recognize the format of the value +of the environment variable. It can split the value is different pieces +and by leaving out the only or the other part it can construct new +values. This happens of course in a predictable way. To understand +this one must know the format of the environment variable value. There +is one more or less standardized form, originally from the X/Open +specification: + +@code{language[_territory[.codeset]][@@modifier]} + +Less specific locale names will be stripped in the order of the +following list: + +@enumerate +@item +@code{codeset} +@item +@code{normalized codeset} +@item +@code{territory} +@item +@code{modifier} +@end enumerate + +The @code{language} field will never be dropped for obvious reasons. + +The only new thing is the @code{normalized codeset} entry. This is +another goodie which is introduced to help reduce the chaos which +derives from the inability of people to standardize the names of +character sets. Instead of @w{ISO-8859-1} one can often see @w{8859-1}, +@w{88591}, @w{iso8859-1}, or @w{iso_8859-1}. The @code{normalized +codeset} value is generated from the user-provided character set name by +applying the following rules: + +@enumerate +@item +Remove all characters besides numbers and letters. +@item +Fold letters to lowercase. +@item +If the same only contains digits prepend the string @code{"iso"}. +@end enumerate + +@noindent +So all of the above names will be normalized to @code{iso88591}. This +allows the program user much more freedom in choosing the locale name. + +Even this extended functionality still does not help to solve the +problem that completely different names can be used to denote the same +locale (e.g., @code{de} and @code{german}). To be of help in this +situation the locale implementation and also the @code{gettext} +functions know about aliases. + +The file @file{/usr/share/locale/locale.alias} (replace @file{/usr} with +whatever prefix you used for configuring the C library) contains a +mapping of alternative names to more regular names. The system manager +is free to add new entries to fill her/his own needs. The selected +locale from the environment is compared with the entries in the first +column of this file ignoring the case. If they match, the value of the +second column is used instead for the further handling. + +In the description of the format of the environment variables we already +mentioned the character set as a factor in the selection of the message +catalog. In fact, only catalogs which contain text written using the +character set of the system/program can be used (directly; there will +come a solution for this some day). This means for the user that s/he +will always have to take care of this. If in the collection of the +message catalogs there are files for the same language but coded using +different character sets the user has to be careful. + + +@node Helper programs for gettext +@subsection Programs to handle message catalogs for @code{gettext} + +@Theglibc{} does not contain the source code for the programs to +handle message catalogs for the @code{gettext} functions. As part of +the GNU project the GNU gettext package contains everything the +developer needs. The functionality provided by the tools in this +package by far exceeds the abilities of the @code{gencat} program +described above for the @code{catgets} functions. + +There is a program @code{msgfmt} which is the equivalent program to the +@code{gencat} program. It generates from the human-readable and +-editable form of the message catalog a binary file which can be used by +the @code{gettext} functions. But there are several more programs +available. + +The @code{xgettext} program can be used to automatically extract the +translatable messages from a source file. I.e., the programmer need not +take care of the translations and the list of messages which have to be +translated. S/He will simply wrap the translatable string in calls to +@code{gettext} et.al and the rest will be done by @code{xgettext}. This +program has a lot of options which help to customize the output or +help to understand the input better. + +Other programs help to manage the development cycle when new messages appear +in the source files or when a new translation of the messages appears. +Here it should only be noted that using all the tools in GNU gettext it +is possible to @emph{completely} automate the handling of message +catalogs. Besides marking the translatable strings in the source code and +generating the translations the developers do not have anything to do +themselves. diff --git a/REORG.TODO/manual/nss.texi b/REORG.TODO/manual/nss.texi new file mode 100644 index 0000000000..ee70ad309d --- /dev/null +++ b/REORG.TODO/manual/nss.texi @@ -0,0 +1,734 @@ +@node Name Service Switch, Users and Groups, Job Control, Top +@chapter System Databases and Name Service Switch +@c %MENU% Accessing system databases +@cindex Name Service Switch +@cindex NSS +@cindex databases + +Various functions in the C Library need to be configured to work +correctly in the local environment. Traditionally, this was done by +using files (e.g., @file{/etc/passwd}), but other nameservices (like the +Network Information Service (NIS) and the Domain Name Service (DNS)) +became popular, and were hacked into the C library, usually with a fixed +search order. + +@Theglibc{} contains a cleaner solution to this problem. It is +designed after a method used by Sun Microsystems in the C library of +@w{Solaris 2}. @Theglibc{} follows their name and calls this +scheme @dfn{Name Service Switch} (NSS). + +Though the interface might be similar to Sun's version there is no +common code. We never saw any source code of Sun's implementation and +so the internal interface is incompatible. This also manifests in the +file names we use as we will see later. + + +@menu +* NSS Basics:: What is this NSS good for. +* NSS Configuration File:: Configuring NSS. +* NSS Module Internals:: How does it work internally. +* Extending NSS:: What to do to add services or databases. +@end menu + +@node NSS Basics, NSS Configuration File, Name Service Switch, Name Service Switch +@section NSS Basics + +The basic idea is to put the implementation of the different services +offered to access the databases in separate modules. This has some +advantages: + +@enumerate +@item +Contributors can add new services without adding them to @theglibc{}. +@item +The modules can be updated separately. +@item +The C library image is smaller. +@end enumerate + +To fulfill the first goal above, the ABI of the modules will be described +below. For getting the implementation of a new service right it is +important to understand how the functions in the modules get called. +They are in no way designed to be used by the programmer directly. +Instead the programmer should only use the documented and standardized +functions to access the databases. + +@noindent +The databases available in the NSS are + +@cindex ethers +@cindex group +@cindex hosts +@cindex netgroup +@cindex networks +@cindex protocols +@cindex passwd +@cindex rpc +@cindex services +@cindex shadow +@table @code +@item aliases +Mail aliases +@comment @pxref{Mail Aliases}. +@item ethers +Ethernet numbers, +@comment @pxref{Ethernet Numbers}. +@item group +Groups of users, @pxref{Group Database}. +@item hosts +Host names and numbers, @pxref{Host Names}. +@item netgroup +Network wide list of host and users, @pxref{Netgroup Database}. +@item networks +Network names and numbers, @pxref{Networks Database}. +@item protocols +Network protocols, @pxref{Protocols Database}. +@item passwd +User passwords, @pxref{User Database}. +@item rpc +Remote procedure call names and numbers, +@comment @pxref{RPC Database}. +@item services +Network services, @pxref{Services Database}. +@item shadow +Shadow user passwords, +@comment @pxref{Shadow Password Database}. +@end table + +@noindent +There will be some more added later (@code{automount}, @code{bootparams}, +@code{netmasks}, and @code{publickey}). + +@node NSS Configuration File, NSS Module Internals, NSS Basics, Name Service Switch +@section The NSS Configuration File + +@cindex @file{/etc/nsswitch.conf} +@cindex @file{nsswitch.conf} +Somehow the NSS code must be told about the wishes of the user. For +this reason there is the file @file{/etc/nsswitch.conf}. For each +database, this file contains a specification of how the lookup process should +work. The file could look like this: + +@example +@include nsswitch.texi +@end example + +The first column is the database as you can guess from the table above. +The rest of the line specifies how the lookup process works. Please +note that you specify the way it works for each database individually. +This cannot be done with the old way of a monolithic implementation. + +The configuration specification for each database can contain two +different items: + +@itemize @bullet +@item +the service specification like @code{files}, @code{db}, or @code{nis}. +@item +the reaction on lookup result like @code{[NOTFOUND=return]}. +@end itemize + +@menu +* Services in the NSS configuration:: Service names in the NSS configuration. +* Actions in the NSS configuration:: React appropriately to the lookup result. +* Notes on NSS Configuration File:: Things to take care about while + configuring NSS. +@end menu + +@node Services in the NSS configuration, Actions in the NSS configuration, NSS Configuration File, NSS Configuration File +@subsection Services in the NSS configuration File + +The above example file mentions five different services: @code{files}, +@code{db}, @code{dns}, @code{nis}, and @code{nisplus}. This does not +mean these +services are available on all sites and neither does it mean these are +all the services which will ever be available. + +In fact, these names are simply strings which the NSS code uses to find +the implicitly addressed functions. The internal interface will be +described later. Visible to the user are the modules which implement an +individual service. + +Assume the service @var{name} shall be used for a lookup. The code for +this service is implemented in a module called @file{libnss_@var{name}}. +On a system supporting shared libraries this is in fact a shared library +with the name (for example) @file{libnss_@var{name}.so.2}. The number +at the end is the currently used version of the interface which will not +change frequently. Normally the user should not have to be cognizant of +these files since they should be placed in a directory where they are +found automatically. Only the names of all available services are +important. + +@node Actions in the NSS configuration, Notes on NSS Configuration File, Services in the NSS configuration, NSS Configuration File +@subsection Actions in the NSS configuration + +The second item in the specification gives the user much finer control +on the lookup process. Action items are placed between two service +names and are written within brackets. The general form is + +@display +@code{[} ( @code{!}? @var{status} @code{=} @var{action} )+ @code{]} +@end display + +@noindent +where + +@smallexample +@var{status} @result{} success | notfound | unavail | tryagain +@var{action} @result{} return | continue +@end smallexample + +The case of the keywords is insignificant. The @var{status} +values are the results of a call to a lookup function of a specific +service. They mean: + +@ftable @samp +@item success +No error occurred and the wanted entry is returned. The default action +for this is @code{return}. + +@item notfound +The lookup process works ok but the needed value was not found. The +default action is @code{continue}. + +@item unavail +@cindex DNS server unavailable +The service is permanently unavailable. This can either mean the needed +file is not available, or, for DNS, the server is not available or does +not allow queries. The default action is @code{continue}. + +@item tryagain +The service is temporarily unavailable. This could mean a file is +locked or a server currently cannot accept more connections. The +default action is @code{continue}. +@end ftable + +@noindent +The @var{action} values mean: + +@ftable @samp +@item return + +If the status matches, stop the lookup process at this service +specification. If an entry is available, provide it to the application. +If an error occurred, report it to the application. In case of a prior +@samp{merge} action, the data is combined with previous lookup results, +as explained below. + +@item continue + +If the status matches, proceed with the lookup process at the next +entry, discarding the result of the current lookup (and any merged +data). An exception is the @samp{initgroups} database and the +@samp{success} status, where @samp{continue} acts like @code{merge} +below. + +@item merge + +Proceed with the lookup process, retaining the current lookup result. +This action is useful only with the @samp{success} status. If a +subsequent service lookup succeeds and has a matching @samp{return} +specification, the results are merged, the lookup process ends, and the +merged results are returned to the application. If the following service +has a matching @samp{merge} action, the lookup process continues, +retaining the combined data from this and any previous lookups. + +After a @code{merge} action, errors from subsequent lookups are ignored, +and the data gathered so far will be returned. + +The @samp{merge} only applies to the @samp{success} status. It is +currently implemented for the @samp{group} database and its group +members field, @samp{gr_mem}. If specified for other databases, it +causes the lookup to fail (if the @var{status} matches). + +When processing @samp{merge} for @samp{group} membership, the group GID +and name must be identical for both entries. If only one or the other is +a match, the behavior is undefined. + +@end ftable + +@noindent +If we have a line like + +@smallexample +ethers: nisplus [NOTFOUND=return] db files +@end smallexample + +@noindent +this is equivalent to + +@smallexample +ethers: nisplus [SUCCESS=return NOTFOUND=return UNAVAIL=continue + TRYAGAIN=continue] + db [SUCCESS=return NOTFOUND=continue UNAVAIL=continue + TRYAGAIN=continue] + files +@end smallexample + +@noindent +(except that it would have to be written on one line). The default +value for the actions are normally what you want, and only need to be +changed in exceptional cases. + +If the optional @code{!} is placed before the @var{status} this means +the following action is used for all statuses but @var{status} itself. +I.e., @code{!} is negation as in the C language (and others). + +Before we explain the exception which makes this action item necessary +one more remark: obviously it makes no sense to add another action +item after the @code{files} service. Since there is no other service +following the action @emph{always} is @code{return}. + +@cindex nisplus, and completeness +Now, why is this @code{[NOTFOUND=return]} action useful? To understand +this we should know that the @code{nisplus} service is often +complete; i.e., if an entry is not available in the NIS+ tables it is +not available anywhere else. This is what is expressed by this action +item: it is useless to examine further services since they will not give +us a result. + +@cindex nisplus, and booting +@cindex bootstrapping, and services +The situation would be different if the NIS+ service is not available +because the machine is booting. In this case the return value of the +lookup function is not @code{notfound} but instead @code{unavail}. And +as you can see in the complete form above: in this situation the +@code{db} and @code{files} services are used. Neat, isn't it? The +system administrator need not pay special care for the time the system +is not completely ready to work (while booting or shutdown or +network problems). + + +@node Notes on NSS Configuration File, , Actions in the NSS configuration, NSS Configuration File +@subsection Notes on the NSS Configuration File + +Finally a few more hints. The NSS implementation is not completely +helpless if @file{/etc/nsswitch.conf} does not exist. For +all supported databases there is a default value so it should normally +be possible to get the system running even if the file is corrupted or +missing. + +@cindex default value, and NSS +For the @code{hosts} and @code{networks} databases the default value is +@code{dns [!UNAVAIL=return] files}. I.e., the system is prepared for +the DNS service not to be available but if it is available the answer it +returns is definitive. + +The @code{passwd}, @code{group}, and @code{shadow} databases are +traditionally handled in a special way. The appropriate files in the +@file{/etc} directory are read but if an entry with a name starting +with a @code{+} character is found NIS is used. This kind of lookup +remains possible by using the special lookup service @code{compat} +and the default value for the three databases above is +@code{compat [NOTFOUND=return] files}. + +For all other databases the default value is +@code{nis [NOTFOUND=return] files}. This solution gives the best +chance to be correct since NIS and file based lookups are used. + +@cindex optimizing NSS +A second point is that the user should try to optimize the lookup +process. The different service have different response times. +A simple file look up on a local file could be fast, but if the file +is long and the needed entry is near the end of the file this may take +quite some time. In this case it might be better to use the @code{db} +service which allows fast local access to large data sets. + +Often the situation is that some global information like NIS must be +used. So it is unavoidable to use service entries like @code{nis} etc. +But one should avoid slow services like this if possible. + + +@node NSS Module Internals, Extending NSS, NSS Configuration File, Name Service Switch +@section NSS Module Internals + +Now it is time to describe what the modules look like. The functions +contained in a module are identified by their names. I.e., there is no +jump table or the like. How this is done is of no interest here; those +interested in this topic should read about Dynamic Linking. +@comment @ref{Dynamic Linking}. + + +@menu +* NSS Module Names:: Construction of the interface function of + the NSS modules. +* NSS Modules Interface:: Programming interface in the NSS module + functions. +@end menu + +@node NSS Module Names, NSS Modules Interface, NSS Module Internals, NSS Module Internals +@subsection The Naming Scheme of the NSS Modules + +@noindent +The name of each function consists of various parts: + +@quotation + _nss_@var{service}_@var{function} +@end quotation + +@var{service} of course corresponds to the name of the module this +function is found in.@footnote{Now you might ask why this information is +duplicated. The answer is that we want to make it possible to link +directly with these shared objects.} The @var{function} part is derived +from the interface function in the C library itself. If the user calls +the function @code{gethostbyname} and the service used is @code{files} +the function + +@smallexample + _nss_files_gethostbyname_r +@end smallexample + +@noindent +in the module + +@smallexample + libnss_files.so.2 +@end smallexample + +@noindent +@cindex reentrant NSS functions +is used. You see, what is explained above in not the whole truth. In +fact the NSS modules only contain reentrant versions of the lookup +functions. I.e., if the user would call the @code{gethostbyname_r} +function this also would end in the above function. For all user +interface functions the C library maps this call to a call to the +reentrant function. For reentrant functions this is trivial since the +interface is (nearly) the same. For the non-reentrant version the +library keeps internal buffers which are used to replace the user +supplied buffer. + +I.e., the reentrant functions @emph{can} have counterparts. No service +module is forced to have functions for all databases and all kinds to +access them. If a function is not available it is simply treated as if +the function would return @code{unavail} +(@pxref{Actions in the NSS configuration}). + +The file name @file{libnss_files.so.2} would be on a @w{Solaris 2} +system @file{nss_files.so.2}. This is the difference mentioned above. +Sun's NSS modules are usable as modules which get indirectly loaded +only. + +The NSS modules in @theglibc{} are prepared to be used as normal +libraries themselves. This is @emph{not} true at the moment, though. +However, the organization of the name space in the modules does not make it +impossible like it is for Solaris. Now you can see why the modules are +still libraries.@footnote{There is a second explanation: we were too +lazy to change the Makefiles to allow the generation of shared objects +not starting with @file{lib} but don't tell this to anybody.} + + +@node NSS Modules Interface, , NSS Module Names, NSS Module Internals +@subsection The Interface of the Function in NSS Modules + +Now we know about the functions contained in the modules. It is now +time to describe the types. When we mentioned the reentrant versions of +the functions above, this means there are some additional arguments +(compared with the standard, non-reentrant versions). The prototypes for +the non-reentrant and reentrant versions of our function above are: + +@smallexample +struct hostent *gethostbyname (const char *name) + +int gethostbyname_r (const char *name, struct hostent *result_buf, + char *buf, size_t buflen, struct hostent **result, + int *h_errnop) +@end smallexample + +@noindent +The actual prototype of the function in the NSS modules in this case is + +@smallexample +enum nss_status _nss_files_gethostbyname_r (const char *name, + struct hostent *result_buf, + char *buf, size_t buflen, + int *errnop, int *h_errnop) +@end smallexample + +I.e., the interface function is in fact the reentrant function with the +change of the return value, the omission of the @var{result} parameter, +and the addition of the @var{errnop} parameter. While the user-level +function returns a pointer to the result the reentrant function return +an @code{enum nss_status} value: + +@vtable @code +@item NSS_STATUS_TRYAGAIN +numeric value @code{-2} + +@item NSS_STATUS_UNAVAIL +numeric value @code{-1} + +@item NSS_STATUS_NOTFOUND +numeric value @code{0} + +@item NSS_STATUS_SUCCESS +numeric value @code{1} +@end vtable + +@noindent +Now you see where the action items of the @file{/etc/nsswitch.conf} file +are used. + +If you study the source code you will find there is a fifth value: +@code{NSS_STATUS_RETURN}. This is an internal use only value, used by a +few functions in places where none of the above value can be used. If +necessary the source code should be examined to learn about the details. + +In case the interface function has to return an error it is important +that the correct error code is stored in @code{*@var{errnop}}. Some +return status values have only one associated error code, others have +more. + +@multitable @columnfractions .3 .2 .50 +@item +@code{NSS_STATUS_TRYAGAIN} @tab + @code{EAGAIN} @tab One of the functions used ran temporarily out of +resources or a service is currently not available. +@item +@tab + @code{ERANGE} @tab The provided buffer is not large enough. +The function should be called again with a larger buffer. +@item +@code{NSS_STATUS_UNAVAIL} @tab + @code{ENOENT} @tab A necessary input file cannot be found. +@item +@code{NSS_STATUS_NOTFOUND} @tab + @code{ENOENT} @tab The requested entry is not available. + +@item +@code{NSS_STATUS_NOTFOUND} @tab + @code{SUCCESS} @tab There are no entries. +Use this to avoid returning errors for inactive services which may +be enabled at a later time. This is not the same as the service +being temporarily unavailable. +@end multitable + +These are proposed values. There can be other error codes and the +described error codes can have different meaning. @strong{With one +exception:} when returning @code{NSS_STATUS_TRYAGAIN} the error code +@code{ERANGE} @emph{must} mean that the user provided buffer is too +small. Everything else is non-critical. + +In statically linked programs, the main application and NSS modules do +not share the same thread-local variable @code{errno}, which is the +reason why there is an explicit @var{errnop} function argument. + +The above function has something special which is missing for almost all +the other module functions. There is an argument @var{h_errnop}. This +points to a variable which will be filled with the error code in case +the execution of the function fails for some reason. (In statically +linked programs, the thread-local variable @code{h_errno} is not shared +with the main application.) + +The @code{get@var{XXX}by@var{YYY}} functions are the most important +functions in the NSS modules. But there are others which implement +the other ways to access system databases (say for the +password database, there are @code{setpwent}, @code{getpwent}, and +@code{endpwent}). These will be described in more detail later. +Here we give a general way to determine the +signature of the module function: + +@itemize @bullet +@item +the return value is @code{enum nss_status}; +@item +the name (@pxref{NSS Module Names}); +@item +the first arguments are identical to the arguments of the non-reentrant +function; +@item +the next four arguments are: + +@table @code +@item STRUCT_TYPE *result_buf +pointer to buffer where the result is stored. @code{STRUCT_TYPE} is +normally a struct which corresponds to the database. +@item char *buffer +pointer to a buffer where the function can store additional data for +the result etc. +@item size_t buflen +length of the buffer pointed to by @var{buffer}. +@item int *errnop +the low-level error code to return to the application. If the return +value is not @code{NSS_STATUS_SUCCESS}, @code{*@var{errnop}} needs to be +set to a non-zero value. An NSS module should never set +@code{*@var{errnop}} to zero. The value @code{ERANGE} is special, as +described above. +@end table + +@item +possibly a last argument @var{h_errnop}, for the host name and network +name lookup functions. If the return value is not +@code{NSS_STATUS_SUCCESS}, @code{*@var{h_errnop}} needs to be set to a +non-zero value. A generic error code is @code{NETDB_INTERNAL}, which +instructs the caller to examine @code{*@var{errnop}} for further +details. (This includes the @code{ERANGE} special case.) +@end itemize + +@noindent +This table is correct for all functions but the @code{set@dots{}ent} +and @code{end@dots{}ent} functions. + + +@node Extending NSS, , NSS Module Internals, Name Service Switch +@section Extending NSS + +One of the advantages of NSS mentioned above is that it can be extended +quite easily. There are two ways in which the extension can happen: +adding another database or adding another service. The former is +normally done only by the C library developers. It is +here only important to remember that adding another database is +independent from adding another service because a service need not +support all databases or lookup functions. + +A designer/implementer of a new service is therefore free to choose the +databases s/he is interested in and leave the rest for later (or +completely aside). + +@menu +* Adding another Service to NSS:: What is to do to add a new service. +* NSS Module Function Internals:: Guidelines for writing new NSS + service functions. +@end menu + +@node Adding another Service to NSS, NSS Module Function Internals, Extending NSS, Extending NSS +@subsection Adding another Service to NSS + +The sources for a new service need not (and should not) be part of @theglibc{} +itself. The developer retains complete control over the +sources and its development. The links between the C library and the +new service module consists solely of the interface functions. + +Each module is designed following a specific interface specification. +For now the version is 2 (the interface in version 1 was not adequate) +and this manifests in the version number of the shared library object of +the NSS modules: they have the extension @code{.2}. If the interface +changes again in an incompatible way, this number will be increased. +Modules using the old interface will still be usable. + +Developers of a new service will have to make sure that their module is +created using the correct interface number. This means the file itself +must have the correct name and on ELF systems the @dfn{soname} (Shared +Object Name) must also have this number. Building a module from a bunch +of object files on an ELF system using GNU CC could be done like this: + +@smallexample +gcc -shared -o libnss_NAME.so.2 -Wl,-soname,libnss_NAME.so.2 OBJECTS +@end smallexample + +@noindent +@ref{Link Options, Options for Linking, , gcc, GNU CC}, to learn +more about this command line. + +To use the new module the library must be able to find it. This can be +achieved by using options for the dynamic linker so that it will search +the directory where the binary is placed. For an ELF system this could be +done by adding the wanted directory to the value of +@code{LD_LIBRARY_PATH}. + +But this is not always possible since some programs (those which run +under IDs which do not belong to the user) ignore this variable. +Therefore the stable version of the module should be placed into a +directory which is searched by the dynamic linker. Normally this should +be the directory @file{$prefix/lib}, where @file{$prefix} corresponds to +the value given to configure using the @code{--prefix} option. But be +careful: this should only be done if it is clear the module does not +cause any harm. System administrators should be careful. + + +@node NSS Module Function Internals, , Adding another Service to NSS, Extending NSS +@subsection Internals of the NSS Module Functions + +Until now we only provided the syntactic interface for the functions in +the NSS module. In fact there is not much more we can say since the +implementation obviously is different for each function. But a few +general rules must be followed by all functions. + +In fact there are four kinds of different functions which may appear in +the interface. All derive from the traditional ones for system databases. +@var{db} in the following table is normally an abbreviation for the +database (e.g., it is @code{pw} for the password database). + +@table @code +@item enum nss_status _nss_@var{database}_set@var{db}ent (void) +This function prepares the service for following operations. For a +simple file based lookup this means files could be opened, for other +services this function simply is a noop. + +One special case for this function is that it takes an additional +argument for some @var{database}s (i.e., the interface is +@code{int set@var{db}ent (int)}). @ref{Host Names}, which describes the +@code{sethostent} function. + +The return value should be @var{NSS_STATUS_SUCCESS} or according to the +table above in case of an error (@pxref{NSS Modules Interface}). + +@item enum nss_status _nss_@var{database}_end@var{db}ent (void) +This function simply closes all files which are still open or removes +buffer caches. If there are no files or buffers to remove this is again +a simple noop. + +There normally is no return value other than @var{NSS_STATUS_SUCCESS}. + +@item enum nss_status _nss_@var{database}_get@var{db}ent_r (@var{STRUCTURE} *result, char *buffer, size_t buflen, int *errnop) +Since this function will be called several times in a row to retrieve +one entry after the other it must keep some kind of state. But this +also means the functions are not really reentrant. They are reentrant +only in that simultaneous calls to this function will not try to +write the retrieved data in the same place (as it would be the case for +the non-reentrant functions); instead, it writes to the structure +pointed to by the @var{result} parameter. But the calls share a common +state and in the case of a file access this means they return neighboring +entries in the file. + +The buffer of length @var{buflen} pointed to by @var{buffer} can be used +for storing some additional data for the result. It is @emph{not} +guaranteed that the same buffer will be passed for the next call of this +function. Therefore one must not misuse this buffer to save some state +information from one call to another. + +Before the function returns with a failure code, the implementation +should store the value of the local @var{errno} variable in the variable +pointed to be @var{errnop}. This is important to guarantee the module +working in statically linked programs. The stored value must not be +zero. + +As explained above this function could also have an additional last +argument. This depends on the database used; it happens only for +@code{host} and @code{networks}. + +The function shall return @code{NSS_STATUS_SUCCESS} as long as there are +more entries. When the last entry was read it should return +@code{NSS_STATUS_NOTFOUND}. When the buffer given as an argument is too +small for the data to be returned @code{NSS_STATUS_TRYAGAIN} should be +returned. When the service was not formerly initialized by a call to +@code{_nss_@var{DATABASE}_set@var{db}ent} all return values allowed for +this function can also be returned here. + +@item enum nss_status _nss_@var{DATABASE}_get@var{db}by@var{XX}_r (@var{PARAMS}, @var{STRUCTURE} *result, char *buffer, size_t buflen, int *errnop) +This function shall return the entry from the database which is +addressed by the @var{PARAMS}. The type and number of these arguments +vary. It must be individually determined by looking to the user-level +interface functions. All arguments given to the non-reentrant version +are here described by @var{PARAMS}. + +The result must be stored in the structure pointed to by @var{result}. +If there are additional data to return (say strings, where the +@var{result} structure only contains pointers) the function must use the +@var{buffer} of length @var{buflen}. There must not be any references +to non-constant global data. + +The implementation of this function should honor the @var{stayopen} +flag set by the @code{set@var{DB}ent} function whenever this makes sense. + +Before the function returns, the implementation should store the value of +the local @var{errno} variable in the variable pointed to by +@var{errnop}. This is important to guarantee the module works in +statically linked programs. + +Again, this function takes an additional last argument for the +@code{host} and @code{networks} database. + +The return value should as always follow the rules given above +(@pxref{NSS Modules Interface}). + +@end table diff --git a/REORG.TODO/manual/nsswitch.texi b/REORG.TODO/manual/nsswitch.texi new file mode 100644 index 0000000000..62e7f607d1 --- /dev/null +++ b/REORG.TODO/manual/nsswitch.texi @@ -0,0 +1,16 @@ +# /etc/nsswitch.conf +# +# Name Service Switch configuration file. +# + +passwd: db files nis +shadow: files +group: db files nis + +hosts: files nisplus nis dns +networks: nisplus [NOTFOUND=return] files + +ethers: nisplus [NOTFOUND=return] db files +protocols: nisplus [NOTFOUND=return] db files +rpc: nisplus [NOTFOUND=return] db files +services: nisplus [NOTFOUND=return] db files diff --git a/REORG.TODO/manual/pattern.texi b/REORG.TODO/manual/pattern.texi new file mode 100644 index 0000000000..069a6a23ea --- /dev/null +++ b/REORG.TODO/manual/pattern.texi @@ -0,0 +1,2311 @@ +@node Pattern Matching, I/O Overview, Searching and Sorting, Top +@c %MENU% Matching shell ``globs'' and regular expressions +@chapter Pattern Matching + +@Theglibc{} provides pattern matching facilities for two kinds of +patterns: regular expressions and file-name wildcards. The library also +provides a facility for expanding variable and command references and +parsing text into words in the way the shell does. + +@menu +* Wildcard Matching:: Matching a wildcard pattern against a single string. +* Globbing:: Finding the files that match a wildcard pattern. +* Regular Expressions:: Matching regular expressions against strings. +* Word Expansion:: Expanding shell variables, nested commands, + arithmetic, and wildcards. + This is what the shell does with shell commands. +@end menu + +@node Wildcard Matching +@section Wildcard Matching + +@pindex fnmatch.h +This section describes how to match a wildcard pattern against a +particular string. The result is a yes or no answer: does the +string fit the pattern or not. The symbols described here are all +declared in @file{fnmatch.h}. + +@comment fnmatch.h +@comment POSIX.2 +@deftypefun int fnmatch (const char *@var{pattern}, const char *@var{string}, int @var{flags}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c fnmatch @mtsenv @mtslocale @ascuheap @acsmem +@c strnlen dup ok +@c mbsrtowcs +@c memset dup ok +@c malloc dup @ascuheap @acsmem +@c mbsinit dup ok +@c free dup @ascuheap @acsmem +@c FCT = internal_fnwmatch @mtsenv @mtslocale @ascuheap @acsmem +@c FOLD @mtslocale +@c towlower @mtslocale +@c EXT @mtsenv @mtslocale @ascuheap @acsmem +@c STRLEN = wcslen dup ok +@c getenv @mtsenv +@c malloc dup @ascuheap @acsmem +@c MEMPCPY = wmempcpy dup ok +@c FCT dup @mtsenv @mtslocale @ascuheap @acsmem +@c STRCAT = wcscat dup ok +@c free dup @ascuheap @acsmem +@c END @mtsenv +@c getenv @mtsenv +@c MEMCHR = wmemchr dup ok +@c getenv @mtsenv +@c IS_CHAR_CLASS = is_char_class @mtslocale +@c wctype @mtslocale +@c BTOWC ok +@c ISWCTYPE ok +@c auto findidx dup ok +@c elem_hash dup ok +@c memcmp dup ok +@c collseq_table_lookup dup ok +@c NO_LEADING_PERIOD ok +This function tests whether the string @var{string} matches the pattern +@var{pattern}. It returns @code{0} if they do match; otherwise, it +returns the nonzero value @code{FNM_NOMATCH}. The arguments +@var{pattern} and @var{string} are both strings. + +The argument @var{flags} is a combination of flag bits that alter the +details of matching. See below for a list of the defined flags. + +In @theglibc{}, @code{fnmatch} might sometimes report ``errors'' by +returning nonzero values that are not equal to @code{FNM_NOMATCH}. +@end deftypefun + +These are the available flags for the @var{flags} argument: + +@vtable @code +@comment fnmatch.h +@comment GNU +@item FNM_FILE_NAME +Treat the @samp{/} character specially, for matching file names. If +this flag is set, wildcard constructs in @var{pattern} cannot match +@samp{/} in @var{string}. Thus, the only way to match @samp{/} is with +an explicit @samp{/} in @var{pattern}. + +@comment fnmatch.h +@comment POSIX.2 +@item FNM_PATHNAME +This is an alias for @code{FNM_FILE_NAME}; it comes from POSIX.2. We +don't recommend this name because we don't use the term ``pathname'' for +file names. + +@comment fnmatch.h +@comment POSIX.2 +@item FNM_PERIOD +Treat the @samp{.} character specially if it appears at the beginning of +@var{string}. If this flag is set, wildcard constructs in @var{pattern} +cannot match @samp{.} as the first character of @var{string}. + +If you set both @code{FNM_PERIOD} and @code{FNM_FILE_NAME}, then the +special treatment applies to @samp{.} following @samp{/} as well as to +@samp{.} at the beginning of @var{string}. (The shell uses the +@code{FNM_PERIOD} and @code{FNM_FILE_NAME} flags together for matching +file names.) + +@comment fnmatch.h +@comment POSIX.2 +@item FNM_NOESCAPE +Don't treat the @samp{\} character specially in patterns. Normally, +@samp{\} quotes the following character, turning off its special meaning +(if any) so that it matches only itself. When quoting is enabled, the +pattern @samp{\?} matches only the string @samp{?}, because the question +mark in the pattern acts like an ordinary character. + +If you use @code{FNM_NOESCAPE}, then @samp{\} is an ordinary character. + +@comment fnmatch.h +@comment GNU +@item FNM_LEADING_DIR +Ignore a trailing sequence of characters starting with a @samp{/} in +@var{string}; that is to say, test whether @var{string} starts with a +directory name that @var{pattern} matches. + +If this flag is set, either @samp{foo*} or @samp{foobar} as a pattern +would match the string @samp{foobar/frobozz}. + +@comment fnmatch.h +@comment GNU +@item FNM_CASEFOLD +Ignore case in comparing @var{string} to @var{pattern}. + +@comment fnmatch.h +@comment GNU +@item FNM_EXTMATCH +@cindex Korn Shell +@pindex ksh +Besides the normal patterns, also recognize the extended patterns +introduced in @file{ksh}. The patterns are written in the form +explained in the following table where @var{pattern-list} is a @code{|} +separated list of patterns. + +@table @code +@item ?(@var{pattern-list}) +The pattern matches if zero or one occurrences of any of the patterns +in the @var{pattern-list} allow matching the input string. + +@item *(@var{pattern-list}) +The pattern matches if zero or more occurrences of any of the patterns +in the @var{pattern-list} allow matching the input string. + +@item +(@var{pattern-list}) +The pattern matches if one or more occurrences of any of the patterns +in the @var{pattern-list} allow matching the input string. + +@item @@(@var{pattern-list}) +The pattern matches if exactly one occurrence of any of the patterns in +the @var{pattern-list} allows matching the input string. + +@item !(@var{pattern-list}) +The pattern matches if the input string cannot be matched with any of +the patterns in the @var{pattern-list}. +@end table +@end vtable + +@node Globbing +@section Globbing + +@cindex globbing +The archetypal use of wildcards is for matching against the files in a +directory, and making a list of all the matches. This is called +@dfn{globbing}. + +You could do this using @code{fnmatch}, by reading the directory entries +one by one and testing each one with @code{fnmatch}. But that would be +slow (and complex, since you would have to handle subdirectories by +hand). + +The library provides a function @code{glob} to make this particular use +of wildcards convenient. @code{glob} and the other symbols in this +section are declared in @file{glob.h}. + +@menu +* Calling Glob:: Basic use of @code{glob}. +* Flags for Globbing:: Flags that enable various options in @code{glob}. +* More Flags for Globbing:: GNU specific extensions to @code{glob}. +@end menu + +@node Calling Glob +@subsection Calling @code{glob} + +The result of globbing is a vector of file names (strings). To return +this vector, @code{glob} uses a special data type, @code{glob_t}, which +is a structure. You pass @code{glob} the address of the structure, and +it fills in the structure's fields to tell you about the results. + +@comment glob.h +@comment POSIX.2 +@deftp {Data Type} glob_t +This data type holds a pointer to a word vector. More precisely, it +records both the address of the word vector and its size. The GNU +implementation contains some more fields which are non-standard +extensions. + +@table @code +@item gl_pathc +The number of elements in the vector, excluding the initial null entries +if the GLOB_DOOFFS flag is used (see gl_offs below). + +@item gl_pathv +The address of the vector. This field has type @w{@code{char **}}. + +@item gl_offs +The offset of the first real element of the vector, from its nominal +address in the @code{gl_pathv} field. Unlike the other fields, this +is always an input to @code{glob}, rather than an output from it. + +If you use a nonzero offset, then that many elements at the beginning of +the vector are left empty. (The @code{glob} function fills them with +null pointers.) + +The @code{gl_offs} field is meaningful only if you use the +@code{GLOB_DOOFFS} flag. Otherwise, the offset is always zero +regardless of what is in this field, and the first real element comes at +the beginning of the vector. + +@item gl_closedir +The address of an alternative implementation of the @code{closedir} +function. It is used if the @code{GLOB_ALTDIRFUNC} bit is set in +the flag parameter. The type of this field is +@w{@code{void (*) (void *)}}. + +This is a GNU extension. + +@item gl_readdir +The address of an alternative implementation of the @code{readdir} +function used to read the contents of a directory. It is used if the +@code{GLOB_ALTDIRFUNC} bit is set in the flag parameter. The type of +this field is @w{@code{struct dirent *(*) (void *)}}. + +An implementation of @code{gl_readdir} needs to initialize the following +members of the @code{struct dirent} object: + +@table @code +@item d_type +This member should be set to the file type of the entry if it is known. +Otherwise, the value @code{DT_UNKNOWN} can be used. The @code{glob} +function may use the specified file type to avoid callbacks in cases +where the file type indicates that the data is not required. + +@item d_ino +This member needs to be non-zero, otherwise @code{glob} may skip the +current entry and call the @code{gl_readdir} callback function again to +retrieve another entry. + +@item d_name +This member must be set to the name of the entry. It must be +null-terminated. +@end table + +The example below shows how to allocate a @code{struct dirent} object +containing a given name. + +@smallexample +@include mkdirent.c.texi +@end smallexample + +The @code{glob} function reads the @code{struct dirent} members listed +above and makes a copy of the file name in the @code{d_name} member +immediately after the @code{gl_readdir} callback function returns. +Future invocations of any of the callback functions may dealloacte or +reuse the buffer. It is the responsibility of the caller of the +@code{glob} function to allocate and deallocate the buffer, around the +call to @code{glob} or using the callback functions. For example, an +application could allocate the buffer in the @code{gl_readdir} callback +function, and deallocate it in the @code{gl_closedir} callback function. + +The @code{gl_readdir} member is a GNU extension. + +@item gl_opendir +The address of an alternative implementation of the @code{opendir} +function. It is used if the @code{GLOB_ALTDIRFUNC} bit is set in +the flag parameter. The type of this field is +@w{@code{void *(*) (const char *)}}. + +This is a GNU extension. + +@item gl_stat +The address of an alternative implementation of the @code{stat} function +to get information about an object in the filesystem. It is used if the +@code{GLOB_ALTDIRFUNC} bit is set in the flag parameter. The type of +this field is @w{@code{int (*) (const char *, struct stat *)}}. + +This is a GNU extension. + +@item gl_lstat +The address of an alternative implementation of the @code{lstat} +function to get information about an object in the filesystems, not +following symbolic links. It is used if the @code{GLOB_ALTDIRFUNC} bit +is set in the flag parameter. The type of this field is @code{@w{int +(*) (const char *,} @w{struct stat *)}}. + +This is a GNU extension. + +@item gl_flags +The flags used when @code{glob} was called. In addition, @code{GLOB_MAGCHAR} +might be set. See @ref{Flags for Globbing} for more details. + +This is a GNU extension. +@end table +@end deftp + +For use in the @code{glob64} function @file{glob.h} contains another +definition for a very similar type. @code{glob64_t} differs from +@code{glob_t} only in the types of the members @code{gl_readdir}, +@code{gl_stat}, and @code{gl_lstat}. + +@comment glob.h +@comment GNU +@deftp {Data Type} glob64_t +This data type holds a pointer to a word vector. More precisely, it +records both the address of the word vector and its size. The GNU +implementation contains some more fields which are non-standard +extensions. + +@table @code +@item gl_pathc +The number of elements in the vector, excluding the initial null entries +if the GLOB_DOOFFS flag is used (see gl_offs below). + +@item gl_pathv +The address of the vector. This field has type @w{@code{char **}}. + +@item gl_offs +The offset of the first real element of the vector, from its nominal +address in the @code{gl_pathv} field. Unlike the other fields, this +is always an input to @code{glob}, rather than an output from it. + +If you use a nonzero offset, then that many elements at the beginning of +the vector are left empty. (The @code{glob} function fills them with +null pointers.) + +The @code{gl_offs} field is meaningful only if you use the +@code{GLOB_DOOFFS} flag. Otherwise, the offset is always zero +regardless of what is in this field, and the first real element comes at +the beginning of the vector. + +@item gl_closedir +The address of an alternative implementation of the @code{closedir} +function. It is used if the @code{GLOB_ALTDIRFUNC} bit is set in +the flag parameter. The type of this field is +@w{@code{void (*) (void *)}}. + +This is a GNU extension. + +@item gl_readdir +The address of an alternative implementation of the @code{readdir64} +function used to read the contents of a directory. It is used if the +@code{GLOB_ALTDIRFUNC} bit is set in the flag parameter. The type of +this field is @w{@code{struct dirent64 *(*) (void *)}}. + +This is a GNU extension. + +@item gl_opendir +The address of an alternative implementation of the @code{opendir} +function. It is used if the @code{GLOB_ALTDIRFUNC} bit is set in +the flag parameter. The type of this field is +@w{@code{void *(*) (const char *)}}. + +This is a GNU extension. + +@item gl_stat +The address of an alternative implementation of the @code{stat64} function +to get information about an object in the filesystem. It is used if the +@code{GLOB_ALTDIRFUNC} bit is set in the flag parameter. The type of +this field is @w{@code{int (*) (const char *, struct stat64 *)}}. + +This is a GNU extension. + +@item gl_lstat +The address of an alternative implementation of the @code{lstat64} +function to get information about an object in the filesystems, not +following symbolic links. It is used if the @code{GLOB_ALTDIRFUNC} bit +is set in the flag parameter. The type of this field is @code{@w{int +(*) (const char *,} @w{struct stat64 *)}}. + +This is a GNU extension. + +@item gl_flags +The flags used when @code{glob} was called. In addition, @code{GLOB_MAGCHAR} +might be set. See @ref{Flags for Globbing} for more details. + +This is a GNU extension. +@end table +@end deftp + +@comment glob.h +@comment POSIX.2 +@deftypefun int glob (const char *@var{pattern}, int @var{flags}, int (*@var{errfunc}) (const char *@var{filename}, int @var{error-code}), glob_t *@var{vector-ptr}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtsenv{} @mtascusig{:ALRM} @mtascutimer{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c glob @mtasurace:utent @mtsenv @mtascusig:ALRM @mtascutimer @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c strlen dup ok +@c strchr dup ok +@c malloc dup @ascuheap @acsmem +@c mempcpy dup ok +@c next_brace_sub ok +@c free dup @ascuheap @acsmem +@c globfree dup @asucorrupt @ascuheap @acucorrupt @acsmem +@c glob_pattern_p ok +@c glob_pattern_type dup ok +@c getenv dup @mtsenv +@c GET_LOGIN_NAME_MAX ok +@c getlogin_r dup @mtasurace:utent @mtascusig:ALRM @mtascutimer @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c GETPW_R_SIZE_MAX ok +@c getpwnam_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c memcpy dup ok +@c memchr dup ok +@c *pglob->gl_stat user-supplied +@c stat64 dup ok +@c S_ISDIR dup ok +@c strdup dup @ascuheap @acsmem +@c glob_pattern_type ok +@c glob_in_dir @mtsenv @mtslocale @asucorrupt @ascuheap @acucorrupt @acsfd @acsmem +@c strlen dup ok +@c glob_pattern_type dup ok +@c malloc dup @ascuheap @acsmem +@c mempcpy dup ok +@c *pglob->gl_stat user-supplied +@c stat64 dup ok +@c free dup @ascuheap @acsmem +@c *pglob->gl_opendir user-supplied +@c opendir dup @ascuheap @acsmem @acsfd +@c dirfd dup ok +@c *pglob->gl_readdir user-supplied +@c CONVERT_DIRENT_DIRENT64 ok +@c readdir64 ok [protected by exclusive use of the stream] +@c REAL_DIR_ENTRY ok +@c DIRENT_MIGHT_BE_DIR ok +@c fnmatch dup @mtsenv @mtslocale @ascuheap @acsmem +@c DIRENT_MIGHT_BE_SYMLINK ok +@c link_exists_p ok +@c link_exists2_p ok +@c strlen dup ok +@c mempcpy dup ok +@c *pglob->gl_stat user-supplied +@c fxstatat64 dup ok +@c realloc dup @ascuheap @acsmem +@c pglob->gl_closedir user-supplied +@c closedir @ascuheap @acsmem @acsfd +@c prefix_array dup @asucorrupt @ascuheap @acucorrupt @acsmem +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c mempcpy dup ok +@c strcpy dup ok +The function @code{glob} does globbing using the pattern @var{pattern} +in the current directory. It puts the result in a newly allocated +vector, and stores the size and address of this vector into +@code{*@var{vector-ptr}}. The argument @var{flags} is a combination of +bit flags; see @ref{Flags for Globbing}, for details of the flags. + +The result of globbing is a sequence of file names. The function +@code{glob} allocates a string for each resulting word, then +allocates a vector of type @code{char **} to store the addresses of +these strings. The last element of the vector is a null pointer. +This vector is called the @dfn{word vector}. + +To return this vector, @code{glob} stores both its address and its +length (number of elements, not counting the terminating null pointer) +into @code{*@var{vector-ptr}}. + +Normally, @code{glob} sorts the file names alphabetically before +returning them. You can turn this off with the flag @code{GLOB_NOSORT} +if you want to get the information as fast as possible. Usually it's +a good idea to let @code{glob} sort them---if you process the files in +alphabetical order, the users will have a feel for the rate of progress +that your application is making. + +If @code{glob} succeeds, it returns 0. Otherwise, it returns one +of these error codes: + +@vtable @code +@comment glob.h +@comment POSIX.2 +@item GLOB_ABORTED +There was an error opening a directory, and you used the flag +@code{GLOB_ERR} or your specified @var{errfunc} returned a nonzero +value. +@iftex +See below +@end iftex +@ifinfo +@xref{Flags for Globbing}, +@end ifinfo +for an explanation of the @code{GLOB_ERR} flag and @var{errfunc}. + +@comment glob.h +@comment POSIX.2 +@item GLOB_NOMATCH +The pattern didn't match any existing files. If you use the +@code{GLOB_NOCHECK} flag, then you never get this error code, because +that flag tells @code{glob} to @emph{pretend} that the pattern matched +at least one file. + +@comment glob.h +@comment POSIX.2 +@item GLOB_NOSPACE +It was impossible to allocate memory to hold the result. +@end vtable + +In the event of an error, @code{glob} stores information in +@code{*@var{vector-ptr}} about all the matches it has found so far. + +It is important to notice that the @code{glob} function will not fail if +it encounters directories or files which cannot be handled without the +LFS interfaces. The implementation of @code{glob} is supposed to use +these functions internally. This at least is the assumption made by +the Unix standard. The GNU extension of allowing the user to provide their +own directory handling and @code{stat} functions complicates things a +bit. If these callback functions are used and a large file or directory +is encountered @code{glob} @emph{can} fail. +@end deftypefun + +@comment glob.h +@comment GNU +@deftypefun int glob64 (const char *@var{pattern}, int @var{flags}, int (*@var{errfunc}) (const char *@var{filename}, int @var{error-code}), glob64_t *@var{vector-ptr}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtsenv{} @mtascusig{:ALRM} @mtascutimer{} @mtslocale{}}@asunsafe{@ascudlopen{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c Same code as glob, but with glob64_t #defined as glob_t. +The @code{glob64} function was added as part of the Large File Summit +extensions but is not part of the original LFS proposal. The reason for +this is simple: it is not necessary. The necessity for a @code{glob64} +function is added by the extensions of the GNU @code{glob} +implementation which allows the user to provide their own directory handling +and @code{stat} functions. The @code{readdir} and @code{stat} functions +do depend on the choice of @code{_FILE_OFFSET_BITS} since the definition +of the types @code{struct dirent} and @code{struct stat} will change +depending on the choice. + +Besides this difference, @code{glob64} works just like @code{glob} in +all aspects. + +This function is a GNU extension. +@end deftypefun + +@node Flags for Globbing +@subsection Flags for Globbing + +This section describes the standard flags that you can specify in the +@var{flags} argument to @code{glob}. Choose the flags you want, +and combine them with the C bitwise OR operator @code{|}. + +Note that there are @ref{More Flags for Globbing} available as GNU extensions. + +@vtable @code +@comment glob.h +@comment POSIX.2 +@item GLOB_APPEND +Append the words from this expansion to the vector of words produced by +previous calls to @code{glob}. This way you can effectively expand +several words as if they were concatenated with spaces between them. + +In order for appending to work, you must not modify the contents of the +word vector structure between calls to @code{glob}. And, if you set +@code{GLOB_DOOFFS} in the first call to @code{glob}, you must also +set it when you append to the results. + +Note that the pointer stored in @code{gl_pathv} may no longer be valid +after you call @code{glob} the second time, because @code{glob} might +have relocated the vector. So always fetch @code{gl_pathv} from the +@code{glob_t} structure after each @code{glob} call; @strong{never} save +the pointer across calls. + +@comment glob.h +@comment POSIX.2 +@item GLOB_DOOFFS +Leave blank slots at the beginning of the vector of words. +The @code{gl_offs} field says how many slots to leave. +The blank slots contain null pointers. + +@comment glob.h +@comment POSIX.2 +@item GLOB_ERR +Give up right away and report an error if there is any difficulty +reading the directories that must be read in order to expand @var{pattern} +fully. Such difficulties might include a directory in which you don't +have the requisite access. Normally, @code{glob} tries its best to keep +on going despite any errors, reading whatever directories it can. + +You can exercise even more control than this by specifying an +error-handler function @var{errfunc} when you call @code{glob}. If +@var{errfunc} is not a null pointer, then @code{glob} doesn't give up +right away when it can't read a directory; instead, it calls +@var{errfunc} with two arguments, like this: + +@smallexample +(*@var{errfunc}) (@var{filename}, @var{error-code}) +@end smallexample + +@noindent +The argument @var{filename} is the name of the directory that +@code{glob} couldn't open or couldn't read, and @var{error-code} is the +@code{errno} value that was reported to @code{glob}. + +If the error handler function returns nonzero, then @code{glob} gives up +right away. Otherwise, it continues. + +@comment glob.h +@comment POSIX.2 +@item GLOB_MARK +If the pattern matches the name of a directory, append @samp{/} to the +directory's name when returning it. + +@comment glob.h +@comment POSIX.2 +@item GLOB_NOCHECK +If the pattern doesn't match any file names, return the pattern itself +as if it were a file name that had been matched. (Normally, when the +pattern doesn't match anything, @code{glob} returns that there were no +matches.) + +@comment glob.h +@comment POSIX.2 +@item GLOB_NOESCAPE +Don't treat the @samp{\} character specially in patterns. Normally, +@samp{\} quotes the following character, turning off its special meaning +(if any) so that it matches only itself. When quoting is enabled, the +pattern @samp{\?} matches only the string @samp{?}, because the question +mark in the pattern acts like an ordinary character. + +If you use @code{GLOB_NOESCAPE}, then @samp{\} is an ordinary character. + +@code{glob} does its work by calling the function @code{fnmatch} +repeatedly. It handles the flag @code{GLOB_NOESCAPE} by turning on the +@code{FNM_NOESCAPE} flag in calls to @code{fnmatch}. + +@comment glob.h +@comment POSIX.2 +@item GLOB_NOSORT +Don't sort the file names; return them in no particular order. +(In practice, the order will depend on the order of the entries in +the directory.) The only reason @emph{not} to sort is to save time. +@end vtable + +@node More Flags for Globbing +@subsection More Flags for Globbing + +Beside the flags described in the last section, the GNU implementation of +@code{glob} allows a few more flags which are also defined in the +@file{glob.h} file. Some of the extensions implement functionality +which is available in modern shell implementations. + +@vtable @code +@comment glob.h +@comment GNU +@item GLOB_PERIOD +The @code{.} character (period) is treated special. It cannot be +matched by wildcards. @xref{Wildcard Matching}, @code{FNM_PERIOD}. + +@comment glob.h +@comment GNU +@item GLOB_MAGCHAR +The @code{GLOB_MAGCHAR} value is not to be given to @code{glob} in the +@var{flags} parameter. Instead, @code{glob} sets this bit in the +@var{gl_flags} element of the @var{glob_t} structure provided as the +result if the pattern used for matching contains any wildcard character. + +@comment glob.h +@comment GNU +@item GLOB_ALTDIRFUNC +Instead of using the normal functions for accessing the +filesystem the @code{glob} implementation uses the user-supplied +functions specified in the structure pointed to by @var{pglob} +parameter. For more information about the functions refer to the +sections about directory handling see @ref{Accessing Directories}, and +@ref{Reading Attributes}. + +@comment glob.h +@comment GNU +@item GLOB_BRACE +If this flag is given, the handling of braces in the pattern is changed. +It is now required that braces appear correctly grouped. I.e., for each +opening brace there must be a closing one. Braces can be used +recursively. So it is possible to define one brace expression in +another one. It is important to note that the range of each brace +expression is completely contained in the outer brace expression (if +there is one). + +The string between the matching braces is separated into single +expressions by splitting at @code{,} (comma) characters. The commas +themselves are discarded. Please note what we said above about recursive +brace expressions. The commas used to separate the subexpressions must +be at the same level. Commas in brace subexpressions are not matched. +They are used during expansion of the brace expression of the deeper +level. The example below shows this + +@smallexample +glob ("@{foo/@{,bar,biz@},baz@}", GLOB_BRACE, NULL, &result) +@end smallexample + +@noindent +is equivalent to the sequence + +@smallexample +glob ("foo/", GLOB_BRACE, NULL, &result) +glob ("foo/bar", GLOB_BRACE|GLOB_APPEND, NULL, &result) +glob ("foo/biz", GLOB_BRACE|GLOB_APPEND, NULL, &result) +glob ("baz", GLOB_BRACE|GLOB_APPEND, NULL, &result) +@end smallexample + +@noindent +if we leave aside error handling. + +@comment glob.h +@comment GNU +@item GLOB_NOMAGIC +If the pattern contains no wildcard constructs (it is a literal file name), +return it as the sole ``matching'' word, even if no file exists by that name. + +@comment glob.h +@comment GNU +@item GLOB_TILDE +If this flag is used the character @code{~} (tilde) is handled specially +if it appears at the beginning of the pattern. Instead of being taken +verbatim it is used to represent the home directory of a known user. + +If @code{~} is the only character in pattern or it is followed by a +@code{/} (slash), the home directory of the process owner is +substituted. Using @code{getlogin} and @code{getpwnam} the information +is read from the system databases. As an example take user @code{bart} +with his home directory at @file{/home/bart}. For him a call like + +@smallexample +glob ("~/bin/*", GLOB_TILDE, NULL, &result) +@end smallexample + +@noindent +would return the contents of the directory @file{/home/bart/bin}. +Instead of referring to the own home directory it is also possible to +name the home directory of other users. To do so one has to append the +user name after the tilde character. So the contents of user +@code{homer}'s @file{bin} directory can be retrieved by + +@smallexample +glob ("~homer/bin/*", GLOB_TILDE, NULL, &result) +@end smallexample + +If the user name is not valid or the home directory cannot be determined +for some reason the pattern is left untouched and itself used as the +result. I.e., if in the last example @code{home} is not available the +tilde expansion yields to @code{"~homer/bin/*"} and @code{glob} is not +looking for a directory named @code{~homer}. + +This functionality is equivalent to what is available in C-shells if the +@code{nonomatch} flag is set. + +@comment glob.h +@comment GNU +@item GLOB_TILDE_CHECK +If this flag is used @code{glob} behaves as if @code{GLOB_TILDE} is +given. The only difference is that if the user name is not available or +the home directory cannot be determined for other reasons this leads to +an error. @code{glob} will return @code{GLOB_NOMATCH} instead of using +the pattern itself as the name. + +This functionality is equivalent to what is available in C-shells if +the @code{nonomatch} flag is not set. + +@comment glob.h +@comment GNU +@item GLOB_ONLYDIR +If this flag is used the globbing function takes this as a +@strong{hint} that the caller is only interested in directories +matching the pattern. If the information about the type of the file +is easily available non-directories will be rejected but no extra +work will be done to determine the information for each file. I.e., +the caller must still be able to filter directories out. + +This functionality is only available with the GNU @code{glob} +implementation. It is mainly used internally to increase the +performance but might be useful for a user as well and therefore is +documented here. +@end vtable + +Calling @code{glob} will in most cases allocate resources which are used +to represent the result of the function call. If the same object of +type @code{glob_t} is used in multiple call to @code{glob} the resources +are freed or reused so that no leaks appear. But this does not include +the time when all @code{glob} calls are done. + +@comment glob.h +@comment POSIX.2 +@deftypefun void globfree (glob_t *@var{pglob}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c globfree dup @asucorrupt @ascuheap @acucorrupt @acsmem +@c free dup @ascuheap @acsmem +The @code{globfree} function frees all resources allocated by previous +calls to @code{glob} associated with the object pointed to by +@var{pglob}. This function should be called whenever the currently used +@code{glob_t} typed object isn't used anymore. +@end deftypefun + +@comment glob.h +@comment GNU +@deftypefun void globfree64 (glob64_t *@var{pglob}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +This function is equivalent to @code{globfree} but it frees records of +type @code{glob64_t} which were allocated by @code{glob64}. +@end deftypefun + + +@node Regular Expressions +@section Regular Expression Matching + +@Theglibc{} supports two interfaces for matching regular +expressions. One is the standard POSIX.2 interface, and the other is +what @theglibc{} has had for many years. + +Both interfaces are declared in the header file @file{regex.h}. +If you define @w{@code{_POSIX_C_SOURCE}}, then only the POSIX.2 +functions, structures, and constants are declared. +@c !!! we only document the POSIX.2 interface here!! + +@menu +* POSIX Regexp Compilation:: Using @code{regcomp} to prepare to match. +* Flags for POSIX Regexps:: Syntax variations for @code{regcomp}. +* Matching POSIX Regexps:: Using @code{regexec} to match the compiled + pattern that you get from @code{regcomp}. +* Regexp Subexpressions:: Finding which parts of the string were matched. +* Subexpression Complications:: Find points of which parts were matched. +* Regexp Cleanup:: Freeing storage; reporting errors. +@end menu + +@node POSIX Regexp Compilation +@subsection POSIX Regular Expression Compilation + +Before you can actually match a regular expression, you must +@dfn{compile} it. This is not true compilation---it produces a special +data structure, not machine instructions. But it is like ordinary +compilation in that its purpose is to enable you to ``execute'' the +pattern fast. (@xref{Matching POSIX Regexps}, for how to use the +compiled regular expression for matching.) + +There is a special data type for compiled regular expressions: + +@comment regex.h +@comment POSIX.2 +@deftp {Data Type} regex_t +This type of object holds a compiled regular expression. +It is actually a structure. It has just one field that your programs +should look at: + +@table @code +@item re_nsub +This field holds the number of parenthetical subexpressions in the +regular expression that was compiled. +@end table + +There are several other fields, but we don't describe them here, because +only the functions in the library should use them. +@end deftp + +After you create a @code{regex_t} object, you can compile a regular +expression into it by calling @code{regcomp}. + +@comment regex.h +@comment POSIX.2 +@deftypefun int regcomp (regex_t *restrict @var{compiled}, const char *restrict @var{pattern}, int @var{cflags}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c All of the issues have to do with memory allocation and multi-byte +@c character handling present in the input string, or implied by ranges +@c or inverted character classes. +@c (re_)malloc @ascuheap @acsmem +@c re_compile_internal @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c (re_)realloc @ascuheap @acsmem [no @asucorrupt @acucorrupt for we zero the buffer] +@c init_dfa @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c (re_)malloc @ascuheap @acsmem +@c calloc @ascuheap @acsmem +@c _NL_CURRENT ok +@c _NL_CURRENT_WORD ok +@c btowc @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c libc_lock_init ok +@c re_string_construct @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_construct_common ok +@c re_string_realloc_buffers @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c build_wcs_upper_buffer @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c isascii ok +@c mbsinit ok +@c toupper ok +@c mbrtowc dup @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c iswlower @mtslocale +@c towupper @mtslocale +@c wcrtomb dup @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c (re_)malloc dup @ascuheap @acsmem +@c build_upper_buffer ok (@mtslocale but optimized) +@c islower ok +@c toupper ok +@c build_wcs_buffer @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c mbrtowc dup @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_translate_buffer ok +@c parse @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c fetch_token @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c peek_token @mtslocale +@c re_string_eoi ok +@c re_string_peek_byte ok +@c re_string_cur_idx ok +@c re_string_length ok +@c re_string_peek_byte_case @mtslocale +@c re_string_peek_byte dup ok +@c re_string_is_single_byte_char ok +@c isascii ok +@c re_string_peek_byte dup ok +@c re_string_wchar_at ok +@c re_string_skip_bytes ok +@c re_string_skip_bytes dup ok +@c parse_reg_exp @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c parse_branch @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c parse_expression @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c create_token_tree dup @ascuheap @acsmem +@c re_string_eoi dup ok +@c re_string_first_byte ok +@c fetch_token dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c create_tree dup @ascuheap @acsmem +@c parse_sub_exp @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c fetch_token dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c parse_reg_exp dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c postorder() @ascuheap @acsmem +@c free_tree @ascuheap @acsmem +@c free_token dup @ascuheap @acsmem +@c create_tree dup @ascuheap @acsmem +@c parse_bracket_exp @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c _NL_CURRENT dup ok +@c _NL_CURRENT_WORD dup ok +@c calloc dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c peek_token_bracket ok +@c re_string_eoi dup ok +@c re_string_peek_byte dup ok +@c re_string_first_byte dup ok +@c re_string_cur_idx dup ok +@c re_string_length dup ok +@c re_string_skip_bytes dup ok +@c bitset_set ok +@c re_string_skip_bytes ok +@c parse_bracket_element @mtslocale +@c re_string_char_size_at ok +@c re_string_wchar_at dup ok +@c re_string_skip_bytes dup ok +@c parse_bracket_symbol @mtslocale +@c re_string_eoi dup ok +@c re_string_fetch_byte_case @mtslocale +@c re_string_fetch_byte ok +@c re_string_first_byte dup ok +@c isascii ok +@c re_string_char_size_at dup ok +@c re_string_skip_bytes dup ok +@c re_string_fetch_byte dup ok +@c re_string_peek_byte dup ok +@c re_string_skip_bytes dup ok +@c peek_token_bracket dup ok +@c auto build_range_exp @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c auto lookup_collation_sequence_value @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c btowc dup @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c collseq_table_lookup ok +@c auto seek_collating_symbol_entry dup ok +@c (re_)realloc dup @ascuheap @acsmem +@c collseq_table_lookup dup ok +@c bitset_set dup ok +@c (re_)realloc dup @ascuheap @acsmem +@c build_equiv_class @mtslocale @ascuheap @acsmem +@c _NL_CURRENT ok +@c auto findidx ok +@c bitset_set dup ok +@c (re_)realloc dup @ascuheap @acsmem +@c auto build_collating_symbol @ascuheap @acsmem +@c auto seek_collating_symbol_entry ok +@c bitset_set dup ok +@c (re_)realloc dup @ascuheap @acsmem +@c build_charclass @mtslocale @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c bitset_set dup ok +@c isalnum ok +@c iscntrl ok +@c isspace ok +@c isalpha ok +@c isdigit ok +@c isprint ok +@c isupper ok +@c isblank ok +@c isgraph ok +@c ispunct ok +@c isxdigit ok +@c bitset_not ok +@c bitset_mask ok +@c create_token_tree dup @ascuheap @acsmem +@c create_tree dup @ascuheap @acsmem +@c free_charset dup @ascuheap @acsmem +@c init_word_char @mtslocale +@c isalnum ok +@c build_charclass_op @mtslocale @ascuheap @acsmem +@c calloc dup @ascuheap @acsmem +@c build_charclass dup @mtslocale @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c free_charset dup @ascuheap @acsmem +@c bitset_set dup ok +@c bitset_not dup ok +@c bitset_mask dup ok +@c create_token_tree dup @ascuheap @acsmem +@c create_tree dup @ascuheap @acsmem +@c parse_dup_op @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_cur_idx dup ok +@c fetch_number @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c fetch_token dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_set_index ok +@c postorder() @ascuheap @acsmem +@c free_tree dup @ascuheap @acsmem +@c mark_opt_subexp ok +@c duplicate_tree @ascuheap @acsmem +@c create_token_tree dup @ascuheap @acsmem +@c create_tree dup @ascuheap @acsmem +@c postorder() @ascuheap @acsmem +@c free_tree dup @ascuheap @acsmem +@c fetch_token dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c parse_branch dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c create_tree dup @ascuheap @acsmem +@c create_tree @ascuheap @acsmem +@c create_token_tree @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c analyze @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c preorder() @ascuheap @acsmem +@c optimize_subexps ok +@c calc_next ok +@c link_nfa_nodes @ascuheap @acsmem +@c re_node_set_init_1 @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c re_node_set_init_2 @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c postorder() @ascuheap @acsmem +@c lower_subexps @ascuheap @acsmem +@c lower_subexp @ascuheap @acsmem +@c create_tree dup @ascuheap @acsmem +@c calc_first @ascuheap @acsmem +@c re_dfa_add_node @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c re_node_set_init_empty ok +@c calc_eclosure @ascuheap @acsmem +@c calc_eclosure_iter @ascuheap @acsmem +@c re_node_set_alloc @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c duplicate_node_closure @ascuheap @acsmem +@c re_node_set_empty ok +@c duplicate_node @ascuheap @acsmem +@c re_dfa_add_node dup @ascuheap @acsmem +@c re_node_set_insert @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c search_duplicated_node ok +@c re_node_set_merge @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c re_node_set_free @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c re_node_set_insert dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c calc_inveclosure @ascuheap @acsmem +@c re_node_set_init_empty dup ok +@c re_node_set_insert_last @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c optimize_utf8 ok +@c create_initial_state @ascuheap @acsmem +@c re_node_set_init_copy @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c re_node_set_init_empty dup ok +@c re_node_set_contains ok +@c re_node_set_merge dup @ascuheap @acsmem +@c re_acquire_state_context @ascuheap @acsmem +@c calc_state_hash ok +@c re_node_set_compare ok +@c create_cd_newstate @ascuheap @acsmem +@c calloc dup @ascuheap @acsmem +@c re_node_set_init_copy dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c free_state @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c NOT_SATISFY_PREV_CONSTRAINT ok +@c re_node_set_remove_at ok +@c register_state @ascuheap @acsmem +@c re_node_set_alloc dup @ascuheap @acsmem +@c re_node_set_insert_last dup @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c free_workarea_compile @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c re_string_destruct @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c free_dfa_content @ascuheap @acsmem +@c free_token @ascuheap @acsmem +@c free_charset @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c re_compile_fastmap @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_compile_fastmap_iter @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_set_fastmap ok +@c tolower ok +@c mbrtowc dup @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c wcrtomb dup @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c towlower @mtslocale +@c _NL_CURRENT ok +@c (re_)free @ascuheap @acsmem +The function @code{regcomp} ``compiles'' a regular expression into a +data structure that you can use with @code{regexec} to match against a +string. The compiled regular expression format is designed for +efficient matching. @code{regcomp} stores it into @code{*@var{compiled}}. + +It's up to you to allocate an object of type @code{regex_t} and pass its +address to @code{regcomp}. + +The argument @var{cflags} lets you specify various options that control +the syntax and semantics of regular expressions. @xref{Flags for POSIX +Regexps}. + +If you use the flag @code{REG_NOSUB}, then @code{regcomp} omits from +the compiled regular expression the information necessary to record +how subexpressions actually match. In this case, you might as well +pass @code{0} for the @var{matchptr} and @var{nmatch} arguments when +you call @code{regexec}. + +If you don't use @code{REG_NOSUB}, then the compiled regular expression +does have the capacity to record how subexpressions match. Also, +@code{regcomp} tells you how many subexpressions @var{pattern} has, by +storing the number in @code{@var{compiled}->re_nsub}. You can use that +value to decide how long an array to allocate to hold information about +subexpression matches. + +@code{regcomp} returns @code{0} if it succeeds in compiling the regular +expression; otherwise, it returns a nonzero error code (see the table +below). You can use @code{regerror} to produce an error message string +describing the reason for a nonzero value; see @ref{Regexp Cleanup}. + +@end deftypefun + +Here are the possible nonzero values that @code{regcomp} can return: + +@vtable @code +@comment regex.h +@comment POSIX.2 +@item REG_BADBR +There was an invalid @samp{\@{@dots{}\@}} construct in the regular +expression. A valid @samp{\@{@dots{}\@}} construct must contain either +a single number, or two numbers in increasing order separated by a +comma. + +@comment regex.h +@comment POSIX.2 +@item REG_BADPAT +There was a syntax error in the regular expression. + +@comment regex.h +@comment POSIX.2 +@item REG_BADRPT +A repetition operator such as @samp{?} or @samp{*} appeared in a bad +position (with no preceding subexpression to act on). + +@comment regex.h +@comment POSIX.2 +@item REG_ECOLLATE +The regular expression referred to an invalid collating element (one not +defined in the current locale for string collation). @xref{Locale +Categories}. + +@comment regex.h +@comment POSIX.2 +@item REG_ECTYPE +The regular expression referred to an invalid character class name. + +@comment regex.h +@comment POSIX.2 +@item REG_EESCAPE +The regular expression ended with @samp{\}. + +@comment regex.h +@comment POSIX.2 +@item REG_ESUBREG +There was an invalid number in the @samp{\@var{digit}} construct. + +@comment regex.h +@comment POSIX.2 +@item REG_EBRACK +There were unbalanced square brackets in the regular expression. + +@comment regex.h +@comment POSIX.2 +@item REG_EPAREN +An extended regular expression had unbalanced parentheses, +or a basic regular expression had unbalanced @samp{\(} and @samp{\)}. + +@comment regex.h +@comment POSIX.2 +@item REG_EBRACE +The regular expression had unbalanced @samp{\@{} and @samp{\@}}. + +@comment regex.h +@comment POSIX.2 +@item REG_ERANGE +One of the endpoints in a range expression was invalid. + +@comment regex.h +@comment POSIX.2 +@item REG_ESPACE +@code{regcomp} ran out of memory. +@end vtable + +@node Flags for POSIX Regexps +@subsection Flags for POSIX Regular Expressions + +These are the bit flags that you can use in the @var{cflags} operand when +compiling a regular expression with @code{regcomp}. + +@vtable @code +@comment regex.h +@comment POSIX.2 +@item REG_EXTENDED +Treat the pattern as an extended regular expression, rather than as a +basic regular expression. + +@comment regex.h +@comment POSIX.2 +@item REG_ICASE +Ignore case when matching letters. + +@comment regex.h +@comment POSIX.2 +@item REG_NOSUB +Don't bother storing the contents of the @var{matchptr} array. + +@comment regex.h +@comment POSIX.2 +@item REG_NEWLINE +Treat a newline in @var{string} as dividing @var{string} into multiple +lines, so that @samp{$} can match before the newline and @samp{^} can +match after. Also, don't permit @samp{.} to match a newline, and don't +permit @samp{[^@dots{}]} to match a newline. + +Otherwise, newline acts like any other ordinary character. +@end vtable + +@node Matching POSIX Regexps +@subsection Matching a Compiled POSIX Regular Expression + +Once you have compiled a regular expression, as described in @ref{POSIX +Regexp Compilation}, you can match it against strings using +@code{regexec}. A match anywhere inside the string counts as success, +unless the regular expression contains anchor characters (@samp{^} or +@samp{$}). + +@comment regex.h +@comment POSIX.2 +@deftypefun int regexec (const regex_t *restrict @var{compiled}, const char *restrict @var{string}, size_t @var{nmatch}, regmatch_t @var{matchptr}[restrict], int @var{eflags}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c libc_lock_lock @asulock @aculock +@c re_search_internal @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_allocate @ascuheap @acsmem +@c re_string_construct_common dup ok +@c re_string_realloc_buffers dup @ascuheap @acsmem +@c match_ctx_init @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c re_string_byte_at ok +@c re_string_first_byte dup ok +@c check_matching @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_cur_idx dup ok +@c acquire_init_state_context dup @ascuheap @acsmem +@c re_string_context_at ok +@c re_string_byte_at dup ok +@c bitset_contain ok +@c re_acquire_state_context dup @ascuheap @acsmem +@c check_subexp_matching_top @ascuheap @acsmem +@c match_ctx_add_subtop @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c calloc dup @ascuheap @acsmem +@c transit_state_bkref @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_cur_idx dup ok +@c re_string_context_at dup ok +@c NOT_SATISFY_NEXT_CONSTRAINT ok +@c get_subexp @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_get_buffer ok +@c search_cur_bkref_entry ok +@c clean_state_log_if_needed @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c extend_buffers @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_realloc_buffers dup @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c build_wcs_upper_buffer dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c build_upper_buffer dup ok (@mtslocale but optimized) +@c build_wcs_buffer dup @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_translate_buffer dup ok +@c get_subexp_sub @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c check_arrival @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c (re_)realloc dup @ascuheap @acsmem +@c re_string_context_at dup ok +@c re_node_set_init_1 dup @ascuheap @acsmem +@c check_arrival_expand_ecl @ascuheap @acsmem +@c re_node_set_alloc dup @ascuheap @acsmem +@c find_subexp_node ok +@c re_node_set_merge dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c check_arrival_expand_ecl_sub @ascuheap @acsmem +@c re_node_set_contains dup ok +@c re_node_set_insert dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c re_node_set_init_copy dup @ascuheap @acsmem +@c re_node_set_init_empty dup ok +@c expand_bkref_cache @ascuheap @acsmem +@c search_cur_bkref_entry dup ok +@c re_node_set_contains dup ok +@c re_node_set_init_1 dup @ascuheap @acsmem +@c check_arrival_expand_ecl dup @ascuheap @acsmem +@c re_node_set_merge dup @ascuheap @acsmem +@c re_node_set_init_copy dup @ascuheap @acsmem +@c re_node_set_insert dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c re_acquire_state @ascuheap @acsmem +@c calc_state_hash dup ok +@c re_node_set_compare dup ok +@c create_ci_newstate @ascuheap @acsmem +@c calloc dup @ascuheap @acsmem +@c re_node_set_init_copy dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c register_state dup @ascuheap @acsmem +@c free_state dup @ascuheap @acsmem +@c re_acquire_state_context dup @ascuheap @acsmem +@c re_node_set_merge dup @ascuheap @acsmem +@c check_arrival_add_next_nodes @mtslocale @ascuheap @acsmem +@c re_node_set_init_empty dup ok +@c check_node_accept_bytes @mtslocale @ascuheap @acsmem +@c re_string_byte_at dup ok +@c re_string_char_size_at dup ok +@c re_string_elem_size_at @mtslocale +@c _NL_CURRENT_WORD dup ok +@c _NL_CURRENT dup ok +@c auto findidx dup ok +@c _NL_CURRENT_WORD dup ok +@c _NL_CURRENT dup ok +@c collseq_table_lookup dup ok +@c find_collation_sequence_value @mtslocale +@c _NL_CURRENT_WORD dup ok +@c _NL_CURRENT dup ok +@c auto findidx dup ok +@c wcscoll @mtslocale @ascuheap @acsmem +@c re_node_set_empty dup ok +@c re_node_set_merge dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c re_node_set_insert dup @ascuheap @acsmem +@c re_acquire_state dup @ascuheap @acsmem +@c check_node_accept ok +@c re_string_byte_at dup ok +@c bitset_contain dup ok +@c re_string_context_at dup ok +@c NOT_SATISFY_NEXT_CONSTRAINT dup ok +@c match_ctx_add_entry @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c clean_state_log_if_needed dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c extend_buffers dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c find_subexp_node dup ok +@c calloc dup @ascuheap @acsmem +@c check_arrival dup *** +@c match_ctx_add_sublast @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c re_acquire_state_context dup @ascuheap @acsmem +@c re_node_set_init_union @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c re_node_set_init_copy dup @ascuheap @acsmem +@c re_node_set_init_empty dup ok +@c re_node_set_free dup @ascuheap @acsmem +@c check_subexp_matching_top dup @ascuheap @acsmem +@c check_halt_state_context ok +@c re_string_context_at dup ok +@c check_halt_node_context ok +@c NOT_SATISFY_NEXT_CONSTRAINT dup ok +@c re_string_eoi dup ok +@c extend_buffers dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c transit_state @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c transit_state_mb @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_context_at dup ok +@c NOT_SATISFY_NEXT_CONSTRAINT dup ok +@c check_node_accept_bytes dup @mtslocale @ascuheap @acsmem +@c re_string_cur_idx dup ok +@c clean_state_log_if_needed @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_node_set_init_union dup @ascuheap @acsmem +@c re_acquire_state_context dup @ascuheap @acsmem +@c re_string_fetch_byte dup ok +@c re_string_context_at dup ok +@c build_trtable @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c group_nodes_into_DFAstates @ascuheap @acsmem +@c bitset_empty dup ok +@c bitset_set dup ok +@c bitset_merge dup ok +@c bitset_set_all ok +@c bitset_clear ok +@c bitset_contain dup ok +@c bitset_copy ok +@c re_node_set_init_copy dup @ascuheap @acsmem +@c re_node_set_insert dup @ascuheap @acsmem +@c re_node_set_init_1 dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c re_node_set_alloc dup @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c bitset_empty ok +@c re_node_set_empty dup ok +@c re_node_set_merge dup @ascuheap @acsmem +@c re_acquire_state_context dup @ascuheap @acsmem +@c bitset_merge ok +@c calloc dup @ascuheap @acsmem +@c bitset_contain dup ok +@c merge_state_with_log @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c re_string_cur_idx dup ok +@c re_node_set_init_union dup @ascuheap @acsmem +@c re_string_context_at dup ok +@c re_node_set_free dup @ascuheap @acsmem +@c check_subexp_matching_top @ascuheap @acsmem +@c match_ctx_add_subtop dup @ascuheap @acsmem +@c transit_state_bkref dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c find_recover_state +@c re_string_cur_idx dup ok +@c re_string_skip_bytes dup ok +@c merge_state_with_log dup @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c check_halt_state_context dup ok +@c prune_impossible_nodes @mtslocale @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c sift_ctx_init ok +@c re_node_set_init_empty dup ok +@c sift_states_backward @mtslocale @ascuheap @acsmem +@c re_node_set_init_1 dup @ascuheap @acsmem +@c update_cur_sifted_state @mtslocale @ascuheap @acsmem +@c add_epsilon_src_nodes @ascuheap @acsmem +@c re_acquire_state dup @ascuheap @acsmem +@c re_node_set_alloc dup @ascuheap @acsmem +@c re_node_set_merge dup @ascuheap @acsmem +@c re_node_set_add_intersect @ascuheap @acsmem +@c (re_)realloc dup @ascuheap @acsmem +@c check_subexp_limits @ascuheap @acsmem +@c sub_epsilon_src_nodes @ascuheap @acsmem +@c re_node_set_init_empty dup ok +@c re_node_set_contains dup ok +@c re_node_set_add_intersect dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c re_node_set_remove_at dup ok +@c re_node_set_contains dup ok +@c re_acquire_state dup @ascuheap @acsmem +@c sift_states_bkref @mtslocale @ascuheap @acsmem +@c search_cur_bkref_entry dup ok +@c check_dst_limits ok +@c search_cur_bkref_entry dup ok +@c check_dst_limits_calc_pos ok +@c check_dst_limits_calc_pos_1 ok +@c re_node_set_init_copy dup @ascuheap @acsmem +@c re_node_set_insert dup @ascuheap @acsmem +@c sift_states_backward dup @mtslocale @ascuheap @acsmem +@c merge_state_array dup @ascuheap @acsmem +@c re_node_set_remove ok +@c re_node_set_contains dup ok +@c re_node_set_remove_at dup ok +@c re_node_set_free dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c re_node_set_empty dup ok +@c build_sifted_states @mtslocale @ascuheap @acsmem +@c sift_states_iter_mb @mtslocale @ascuheap @acsmem +@c check_node_accept_bytes dup @mtslocale @ascuheap @acsmem +@c check_node_accept dup ok +@c check_dst_limits dup ok +@c re_node_set_insert dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c check_halt_state_context dup ok +@c merge_state_array @ascuheap @acsmem +@c re_node_set_init_union dup @ascuheap @acsmem +@c re_acquire_state dup @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c set_regs @ascuheap @acsmem +@c (re_)malloc dup @ascuheap @acsmem +@c re_node_set_init_empty dup ok +@c free_fail_stack_return @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c update_regs ok +@c re_node_set_free dup @ascuheap @acsmem +@c pop_fail_stack @ascuheap @acsmem +@c re_node_set_free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c match_ctx_free @ascuheap @acsmem +@c match_ctx_clean @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c (re_)free dup @ascuheap @acsmem +@c re_string_destruct dup @ascuheap @acsmem +@c libc_lock_unlock @aculock +This function tries to match the compiled regular expression +@code{*@var{compiled}} against @var{string}. + +@code{regexec} returns @code{0} if the regular expression matches; +otherwise, it returns a nonzero value. See the table below for +what nonzero values mean. You can use @code{regerror} to produce an +error message string describing the reason for a nonzero value; +see @ref{Regexp Cleanup}. + +The argument @var{eflags} is a word of bit flags that enable various +options. + +If you want to get information about what part of @var{string} actually +matched the regular expression or its subexpressions, use the arguments +@var{matchptr} and @var{nmatch}. Otherwise, pass @code{0} for +@var{nmatch}, and @code{NULL} for @var{matchptr}. @xref{Regexp +Subexpressions}. +@end deftypefun + +You must match the regular expression with the same set of current +locales that were in effect when you compiled the regular expression. + +The function @code{regexec} accepts the following flags in the +@var{eflags} argument: + +@vtable @code +@comment regex.h +@comment POSIX.2 +@item REG_NOTBOL +Do not regard the beginning of the specified string as the beginning of +a line; more generally, don't make any assumptions about what text might +precede it. + +@comment regex.h +@comment POSIX.2 +@item REG_NOTEOL +Do not regard the end of the specified string as the end of a line; more +generally, don't make any assumptions about what text might follow it. +@end vtable + +Here are the possible nonzero values that @code{regexec} can return: + +@vtable @code +@comment regex.h +@comment POSIX.2 +@item REG_NOMATCH +The pattern didn't match the string. This isn't really an error. + +@comment regex.h +@comment POSIX.2 +@item REG_ESPACE +@code{regexec} ran out of memory. +@end vtable + +@node Regexp Subexpressions +@subsection Match Results with Subexpressions + +When @code{regexec} matches parenthetical subexpressions of +@var{pattern}, it records which parts of @var{string} they match. It +returns that information by storing the offsets into an array whose +elements are structures of type @code{regmatch_t}. The first element of +the array (index @code{0}) records the part of the string that matched +the entire regular expression. Each other element of the array records +the beginning and end of the part that matched a single parenthetical +subexpression. + +@comment regex.h +@comment POSIX.2 +@deftp {Data Type} regmatch_t +This is the data type of the @var{matchptr} array that you pass to +@code{regexec}. It contains two structure fields, as follows: + +@table @code +@item rm_so +The offset in @var{string} of the beginning of a substring. Add this +value to @var{string} to get the address of that part. + +@item rm_eo +The offset in @var{string} of the end of the substring. +@end table +@end deftp + +@comment regex.h +@comment POSIX.2 +@deftp {Data Type} regoff_t +@code{regoff_t} is an alias for another signed integer type. +The fields of @code{regmatch_t} have type @code{regoff_t}. +@end deftp + +The @code{regmatch_t} elements correspond to subexpressions +positionally; the first element (index @code{1}) records where the first +subexpression matched, the second element records the second +subexpression, and so on. The order of the subexpressions is the order +in which they begin. + +When you call @code{regexec}, you specify how long the @var{matchptr} +array is, with the @var{nmatch} argument. This tells @code{regexec} how +many elements to store. If the actual regular expression has more than +@var{nmatch} subexpressions, then you won't get offset information about +the rest of them. But this doesn't alter whether the pattern matches a +particular string or not. + +If you don't want @code{regexec} to return any information about where +the subexpressions matched, you can either supply @code{0} for +@var{nmatch}, or use the flag @code{REG_NOSUB} when you compile the +pattern with @code{regcomp}. + +@node Subexpression Complications +@subsection Complications in Subexpression Matching + +Sometimes a subexpression matches a substring of no characters. This +happens when @samp{f\(o*\)} matches the string @samp{fum}. (It really +matches just the @samp{f}.) In this case, both of the offsets identify +the point in the string where the null substring was found. In this +example, the offsets are both @code{1}. + +Sometimes the entire regular expression can match without using some of +its subexpressions at all---for example, when @samp{ba\(na\)*} matches the +string @samp{ba}, the parenthetical subexpression is not used. When +this happens, @code{regexec} stores @code{-1} in both fields of the +element for that subexpression. + +Sometimes matching the entire regular expression can match a particular +subexpression more than once---for example, when @samp{ba\(na\)*} +matches the string @samp{bananana}, the parenthetical subexpression +matches three times. When this happens, @code{regexec} usually stores +the offsets of the last part of the string that matched the +subexpression. In the case of @samp{bananana}, these offsets are +@code{6} and @code{8}. + +But the last match is not always the one that is chosen. It's more +accurate to say that the last @emph{opportunity} to match is the one +that takes precedence. What this means is that when one subexpression +appears within another, then the results reported for the inner +subexpression reflect whatever happened on the last match of the outer +subexpression. For an example, consider @samp{\(ba\(na\)*s \)*} matching +the string @samp{bananas bas }. The last time the inner expression +actually matches is near the end of the first word. But it is +@emph{considered} again in the second word, and fails to match there. +@code{regexec} reports nonuse of the ``na'' subexpression. + +Another place where this rule applies is when the regular expression +@smallexample +\(ba\(na\)*s \|nefer\(ti\)* \)* +@end smallexample +@noindent +matches @samp{bananas nefertiti}. The ``na'' subexpression does match +in the first word, but it doesn't match in the second word because the +other alternative is used there. Once again, the second repetition of +the outer subexpression overrides the first, and within that second +repetition, the ``na'' subexpression is not used. So @code{regexec} +reports nonuse of the ``na'' subexpression. + +@node Regexp Cleanup +@subsection POSIX Regexp Matching Cleanup + +When you are finished using a compiled regular expression, you can +free the storage it uses by calling @code{regfree}. + +@comment regex.h +@comment POSIX.2 +@deftypefun void regfree (regex_t *@var{compiled}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c (re_)free dup @ascuheap @acsmem +@c free_dfa_content dup @ascuheap @acsmem +Calling @code{regfree} frees all the storage that @code{*@var{compiled}} +points to. This includes various internal fields of the @code{regex_t} +structure that aren't documented in this manual. + +@code{regfree} does not free the object @code{*@var{compiled}} itself. +@end deftypefun + +You should always free the space in a @code{regex_t} structure with +@code{regfree} before using the structure to compile another regular +expression. + +When @code{regcomp} or @code{regexec} reports an error, you can use +the function @code{regerror} to turn it into an error message string. + +@comment regex.h +@comment POSIX.2 +@deftypefun size_t regerror (int @var{errcode}, const regex_t *restrict @var{compiled}, char *restrict @var{buffer}, size_t @var{length}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c regerror calls gettext, strcmp and mempcpy or memcpy. +This function produces an error message string for the error code +@var{errcode}, and stores the string in @var{length} bytes of memory +starting at @var{buffer}. For the @var{compiled} argument, supply the +same compiled regular expression structure that @code{regcomp} or +@code{regexec} was working with when it got the error. Alternatively, +you can supply @code{NULL} for @var{compiled}; you will still get a +meaningful error message, but it might not be as detailed. + +If the error message can't fit in @var{length} bytes (including a +terminating null character), then @code{regerror} truncates it. +The string that @code{regerror} stores is always null-terminated +even if it has been truncated. + +The return value of @code{regerror} is the minimum length needed to +store the entire error message. If this is less than @var{length}, then +the error message was not truncated, and you can use it. Otherwise, you +should call @code{regerror} again with a larger buffer. + +Here is a function which uses @code{regerror}, but always dynamically +allocates a buffer for the error message: + +@smallexample +char *get_regerror (int errcode, regex_t *compiled) +@{ + size_t length = regerror (errcode, compiled, NULL, 0); + char *buffer = xmalloc (length); + (void) regerror (errcode, compiled, buffer, length); + return buffer; +@} +@end smallexample +@end deftypefun + +@node Word Expansion +@section Shell-Style Word Expansion +@cindex word expansion +@cindex expansion of shell words + +@dfn{Word expansion} means the process of splitting a string into +@dfn{words} and substituting for variables, commands, and wildcards +just as the shell does. + +For example, when you write @samp{ls -l foo.c}, this string is split +into three separate words---@samp{ls}, @samp{-l} and @samp{foo.c}. +This is the most basic function of word expansion. + +When you write @samp{ls *.c}, this can become many words, because +the word @samp{*.c} can be replaced with any number of file names. +This is called @dfn{wildcard expansion}, and it is also a part of +word expansion. + +When you use @samp{echo $PATH} to print your path, you are taking +advantage of @dfn{variable substitution}, which is also part of word +expansion. + +Ordinary programs can perform word expansion just like the shell by +calling the library function @code{wordexp}. + +@menu +* Expansion Stages:: What word expansion does to a string. +* Calling Wordexp:: How to call @code{wordexp}. +* Flags for Wordexp:: Options you can enable in @code{wordexp}. +* Wordexp Example:: A sample program that does word expansion. +* Tilde Expansion:: Details of how tilde expansion works. +* Variable Substitution:: Different types of variable substitution. +@end menu + +@node Expansion Stages +@subsection The Stages of Word Expansion + +When word expansion is applied to a sequence of words, it performs the +following transformations in the order shown here: + +@enumerate +@item +@cindex tilde expansion +@dfn{Tilde expansion}: Replacement of @samp{~foo} with the name of +the home directory of @samp{foo}. + +@item +Next, three different transformations are applied in the same step, +from left to right: + +@itemize @bullet +@item +@cindex variable substitution +@cindex substitution of variables and commands +@dfn{Variable substitution}: Environment variables are substituted for +references such as @samp{$foo}. + +@item +@cindex command substitution +@dfn{Command substitution}: Constructs such as @w{@samp{`cat foo`}} and +the equivalent @w{@samp{$(cat foo)}} are replaced with the output from +the inner command. + +@item +@cindex arithmetic expansion +@dfn{Arithmetic expansion}: Constructs such as @samp{$(($x-1))} are +replaced with the result of the arithmetic computation. +@end itemize + +@item +@cindex field splitting +@dfn{Field splitting}: subdivision of the text into @dfn{words}. + +@item +@cindex wildcard expansion +@dfn{Wildcard expansion}: The replacement of a construct such as @samp{*.c} +with a list of @samp{.c} file names. Wildcard expansion applies to an +entire word at a time, and replaces that word with 0 or more file names +that are themselves words. + +@item +@cindex quote removal +@cindex removal of quotes +@dfn{Quote removal}: The deletion of string-quotes, now that they have +done their job by inhibiting the above transformations when appropriate. +@end enumerate + +For the details of these transformations, and how to write the constructs +that use them, see @w{@cite{The BASH Manual}} (to appear). + +@node Calling Wordexp +@subsection Calling @code{wordexp} + +All the functions, constants and data types for word expansion are +declared in the header file @file{wordexp.h}. + +Word expansion produces a vector of words (strings). To return this +vector, @code{wordexp} uses a special data type, @code{wordexp_t}, which +is a structure. You pass @code{wordexp} the address of the structure, +and it fills in the structure's fields to tell you about the results. + +@comment wordexp.h +@comment POSIX.2 +@deftp {Data Type} {wordexp_t} +This data type holds a pointer to a word vector. More precisely, it +records both the address of the word vector and its size. + +@table @code +@item we_wordc +The number of elements in the vector. + +@item we_wordv +The address of the vector. This field has type @w{@code{char **}}. + +@item we_offs +The offset of the first real element of the vector, from its nominal +address in the @code{we_wordv} field. Unlike the other fields, this +is always an input to @code{wordexp}, rather than an output from it. + +If you use a nonzero offset, then that many elements at the beginning of +the vector are left empty. (The @code{wordexp} function fills them with +null pointers.) + +The @code{we_offs} field is meaningful only if you use the +@code{WRDE_DOOFFS} flag. Otherwise, the offset is always zero +regardless of what is in this field, and the first real element comes at +the beginning of the vector. +@end table +@end deftp + +@comment wordexp.h +@comment POSIX.2 +@deftypefun int wordexp (const char *@var{words}, wordexp_t *@var{word-vector-ptr}, int @var{flags}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtasuconst{:@mtsenv{}} @mtsenv{} @mtascusig{:ALRM} @mtascutimer{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuintl{} @ascuheap{} @asucorrupt{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c wordexp @mtasurace:utent @mtasuconst:@mtsenv @mtsenv @mtascusig:ALRM @mtascutimer @mtslocale @ascudlopen @ascuplugin @ascuintl @ascuheap @asucorrupt @asulock @acucorrupt @aculock @acsfd @acsmem +@c w_newword ok +@c wordfree dup @asucorrupt @ascuheap @acucorrupt @acsmem +@c calloc dup @ascuheap @acsmem +@c getenv dup @mtsenv +@c strcpy dup ok +@c parse_backslash @ascuheap @acsmem +@c w_addchar dup @ascuheap @acsmem +@c parse_dollars @mtasuconst:@mtsenv @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c w_addchar dup @ascuheap @acsmem +@c parse_arith @mtasuconst:@mtsenv @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c w_newword dup ok +@c parse_dollars dup @mtasuconst:@mtsenv @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c parse_backtick dup @ascuplugin @ascuheap @aculock @acsfd @acsmem +@c parse_qtd_backslash dup @ascuheap @acsmem +@c eval_expr @mtslocale +@c eval_expr_multidiv @mtslocale +@c eval_expr_val @mtslocale +@c isspace dup @mtslocale +@c eval_expr dup @mtslocale +@c isspace dup @mtslocale +@c isspace dup @mtslocale +@c free dup @ascuheap @acsmem +@c w_addchar dup @ascuheap @acsmem +@c w_addstr dup @ascuheap @acsmem +@c itoa_word dup ok +@c parse_comm @ascuplugin @ascuheap @aculock @acsfd @acsmem +@c w_newword dup ok +@c pthread_setcancelstate @ascuplugin @ascuheap @acsmem +@c (disable cancellation around exec_comm; it may do_cancel the +@c second time, if async cancel is enabled) +@c THREAD_ATOMIC_CMPXCHG_VAL dup ok +@c CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS dup ok +@c do_cancel @ascuplugin @ascuheap @acsmem +@c THREAD_ATOMIC_BIT_SET dup ok +@c pthread_unwind @ascuplugin @ascuheap @acsmem +@c Unwind_ForcedUnwind if available @ascuplugin @ascuheap @acsmem +@c libc_unwind_longjmp otherwise +@c cleanups +@c exec_comm @ascuplugin @ascuheap @aculock @acsfd @acsmem +@c pipe2 dup ok +@c pipe dup ok +@c fork dup @ascuplugin @aculock +@c close dup @acsfd +@c on child: exec_comm_child -> exec or abort +@c waitpid dup ok +@c read dup ok +@c w_addmem dup @ascuheap @acsmem +@c strchr dup ok +@c w_addword dup @ascuheap @acsmem +@c w_newword dup ok +@c w_addchar dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c kill dup ok +@c free dup @ascuheap @acsmem +@c parse_param @mtasuconst:@mtsenv @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c reads from __libc_argc and __libc_argv without guards +@c w_newword dup ok +@c isalpha dup @mtslocale^^ +@c w_addchar dup @ascuheap @acsmem +@c isalnum dup @mtslocale^^ +@c isdigit dup @mtslocale^^ +@c strchr dup ok +@c itoa_word dup ok +@c atoi dup @mtslocale +@c getpid dup ok +@c w_addstr dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c stpcpy dup ok +@c w_addword dup @ascuheap @acsmem +@c strdup dup @ascuheap @acsmem +@c getenv dup @mtsenv +@c parse_dollars dup @mtasuconst:@mtsenv @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c parse_tilde dup @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c fnmatch dup @mtsenv @mtslocale @ascuheap @acsmem +@c mempcpy dup ok +@c _ dup @ascuintl +@c fxprintf dup @aculock +@c setenv dup @mtasuconst:@mtsenv @ascuheap @asulock @acucorrupt @aculock @acsmem +@c strspn dup ok +@c strcspn dup ok +@c parse_backtick @ascuplugin @ascuheap @aculock @acsfd @acsmem +@c w_newword dup ok +@c exec_comm dup @ascuplugin @ascuheap @aculock @acsfd @acsmem +@c free dup @ascuheap @acsmem +@c parse_qtd_backslash dup @ascuheap @acsmem +@c parse_backslash dup @ascuheap @acsmem +@c w_addchar dup @ascuheap @acsmem +@c parse_dquote @mtasuconst:@mtsenv @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c parse_dollars dup @mtasuconst:@mtsenv @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c parse_backtick dup @ascuplugin @ascuheap @aculock @acsfd @acsmem +@c parse_qtd_backslash dup @ascuheap @acsmem +@c w_addchar dup @ascuheap @acsmem +@c w_addword dup @ascuheap @acsmem +@c strdup dup @ascuheap @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c parse_squote dup @ascuheap @acsmem +@c w_addchar dup @ascuheap @acsmem +@c parse_tilde @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c strchr dup ok +@c w_addchar dup @ascuheap @acsmem +@c getenv dup @mtsenv +@c w_addstr dup @ascuheap @acsmem +@c strlen dup ok +@c w_addmem dup @ascuheap @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c mempcpy dup ok +@c getuid dup ok +@c getpwuid_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c getpwnam_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c parse_glob @mtasurace:utent @mtasuconst:@mtsenv @mtsenv @mtascusig:ALRM @mtascutimer @mtslocale @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c strchr dup ok +@c parse_dollars dup @mtasuconst:@mtsenv @mtslocale @mtsenv @ascudlopen @ascuplugin @ascuintl @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c parse_qtd_backslash @ascuheap @acsmem +@c w_addchar dup @ascuheap @acsmem +@c parse_backslash dup @ascuheap @acsmem +@c w_addchar dup @ascuheap @acsmem +@c w_addword dup @ascuheap @acsmem +@c w_newword dup ok +@c do_parse_glob @mtasurace:utent @mtsenv @mtascusig:ALRM @mtascutimer @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @aculock @acsfd @acsmem +@c glob dup @mtasurace:utent @mtsenv @mtascusig:ALRM @mtascutimer @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @aculock @acsfd @acsmem [auto glob_t avoids @asucorrupt @acucorrupt] +@c w_addstr dup @ascuheap @acsmem +@c w_addchar dup @ascuheap @acsmem +@c globfree dup @ascuheap @acsmem [auto glob_t avoids @asucorrupt @acucorrupt] +@c free dup @ascuheap @acsmem +@c w_newword dup ok +@c strdup dup @ascuheap @acsmem +@c w_addword dup @ascuheap @acsmem +@c wordfree dup @asucorrupt @ascuheap @acucorrupt @acsmem +@c strchr dup ok +@c w_addchar dup @ascuheap @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +Perform word expansion on the string @var{words}, putting the result in +a newly allocated vector, and store the size and address of this vector +into @code{*@var{word-vector-ptr}}. The argument @var{flags} is a +combination of bit flags; see @ref{Flags for Wordexp}, for details of +the flags. + +You shouldn't use any of the characters @samp{|&;<>} in the string +@var{words} unless they are quoted; likewise for newline. If you use +these characters unquoted, you will get the @code{WRDE_BADCHAR} error +code. Don't use parentheses or braces unless they are quoted or part of +a word expansion construct. If you use quotation characters @samp{'"`}, +they should come in pairs that balance. + +The results of word expansion are a sequence of words. The function +@code{wordexp} allocates a string for each resulting word, then +allocates a vector of type @code{char **} to store the addresses of +these strings. The last element of the vector is a null pointer. +This vector is called the @dfn{word vector}. + +To return this vector, @code{wordexp} stores both its address and its +length (number of elements, not counting the terminating null pointer) +into @code{*@var{word-vector-ptr}}. + +If @code{wordexp} succeeds, it returns 0. Otherwise, it returns one +of these error codes: + +@vtable @code +@comment wordexp.h +@comment POSIX.2 +@item WRDE_BADCHAR +The input string @var{words} contains an unquoted invalid character such +as @samp{|}. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_BADVAL +The input string refers to an undefined shell variable, and you used the flag +@code{WRDE_UNDEF} to forbid such references. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_CMDSUB +The input string uses command substitution, and you used the flag +@code{WRDE_NOCMD} to forbid command substitution. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_NOSPACE +It was impossible to allocate memory to hold the result. In this case, +@code{wordexp} can store part of the results---as much as it could +allocate room for. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_SYNTAX +There was a syntax error in the input string. For example, an unmatched +quoting character is a syntax error. This error code is also used to +signal division by zero and overflow in arithmetic expansion. +@end vtable +@end deftypefun + +@comment wordexp.h +@comment POSIX.2 +@deftypefun void wordfree (wordexp_t *@var{word-vector-ptr}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c wordfree dup @asucorrupt @ascuheap @acucorrupt @acsmem +@c free dup @ascuheap @acsmem +Free the storage used for the word-strings and vector that +@code{*@var{word-vector-ptr}} points to. This does not free the +structure @code{*@var{word-vector-ptr}} itself---only the other +data it points to. +@end deftypefun + +@node Flags for Wordexp +@subsection Flags for Word Expansion + +This section describes the flags that you can specify in the +@var{flags} argument to @code{wordexp}. Choose the flags you want, +and combine them with the C operator @code{|}. + +@vtable @code +@comment wordexp.h +@comment POSIX.2 +@item WRDE_APPEND +Append the words from this expansion to the vector of words produced by +previous calls to @code{wordexp}. This way you can effectively expand +several words as if they were concatenated with spaces between them. + +In order for appending to work, you must not modify the contents of the +word vector structure between calls to @code{wordexp}. And, if you set +@code{WRDE_DOOFFS} in the first call to @code{wordexp}, you must also +set it when you append to the results. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_DOOFFS +Leave blank slots at the beginning of the vector of words. +The @code{we_offs} field says how many slots to leave. +The blank slots contain null pointers. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_NOCMD +Don't do command substitution; if the input requests command substitution, +report an error. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_REUSE +Reuse a word vector made by a previous call to @code{wordexp}. +Instead of allocating a new vector of words, this call to @code{wordexp} +will use the vector that already exists (making it larger if necessary). + +Note that the vector may move, so it is not safe to save an old pointer +and use it again after calling @code{wordexp}. You must fetch +@code{we_pathv} anew after each call. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_SHOWERR +Do show any error messages printed by commands run by command substitution. +More precisely, allow these commands to inherit the standard error output +stream of the current process. By default, @code{wordexp} gives these +commands a standard error stream that discards all output. + +@comment wordexp.h +@comment POSIX.2 +@item WRDE_UNDEF +If the input refers to a shell variable that is not defined, report an +error. +@end vtable + +@node Wordexp Example +@subsection @code{wordexp} Example + +Here is an example of using @code{wordexp} to expand several strings +and use the results to run a shell command. It also shows the use of +@code{WRDE_APPEND} to concatenate the expansions and of @code{wordfree} +to free the space allocated by @code{wordexp}. + +@smallexample +int +expand_and_execute (const char *program, const char **options) +@{ + wordexp_t result; + pid_t pid + int status, i; + + /* @r{Expand the string for the program to run.} */ + switch (wordexp (program, &result, 0)) + @{ + case 0: /* @r{Successful}. */ + break; + case WRDE_NOSPACE: + /* @r{If the error was @code{WRDE_NOSPACE},} + @r{then perhaps part of the result was allocated.} */ + wordfree (&result); + default: /* @r{Some other error.} */ + return -1; + @} + + /* @r{Expand the strings specified for the arguments.} */ + for (i = 0; options[i] != NULL; i++) + @{ + if (wordexp (options[i], &result, WRDE_APPEND)) + @{ + wordfree (&result); + return -1; + @} + @} + + pid = fork (); + if (pid == 0) + @{ + /* @r{This is the child process. Execute the command.} */ + execv (result.we_wordv[0], result.we_wordv); + exit (EXIT_FAILURE); + @} + else if (pid < 0) + /* @r{The fork failed. Report failure.} */ + status = -1; + else + /* @r{This is the parent process. Wait for the child to complete.} */ + if (waitpid (pid, &status, 0) != pid) + status = -1; + + wordfree (&result); + return status; +@} +@end smallexample + +@node Tilde Expansion +@subsection Details of Tilde Expansion + +It's a standard part of shell syntax that you can use @samp{~} at the +beginning of a file name to stand for your own home directory. You +can use @samp{~@var{user}} to stand for @var{user}'s home directory. + +@dfn{Tilde expansion} is the process of converting these abbreviations +to the directory names that they stand for. + +Tilde expansion applies to the @samp{~} plus all following characters up +to whitespace or a slash. It takes place only at the beginning of a +word, and only if none of the characters to be transformed is quoted in +any way. + +Plain @samp{~} uses the value of the environment variable @code{HOME} +as the proper home directory name. @samp{~} followed by a user name +uses @code{getpwname} to look up that user in the user database, and +uses whatever directory is recorded there. Thus, @samp{~} followed +by your own name can give different results from plain @samp{~}, if +the value of @code{HOME} is not really your home directory. + +@node Variable Substitution +@subsection Details of Variable Substitution + +Part of ordinary shell syntax is the use of @samp{$@var{variable}} to +substitute the value of a shell variable into a command. This is called +@dfn{variable substitution}, and it is one part of doing word expansion. + +There are two basic ways you can write a variable reference for +substitution: + +@table @code +@item $@{@var{variable}@} +If you write braces around the variable name, then it is completely +unambiguous where the variable name ends. You can concatenate +additional letters onto the end of the variable value by writing them +immediately after the close brace. For example, @samp{$@{foo@}s} +expands into @samp{tractors}. + +@item $@var{variable} +If you do not put braces around the variable name, then the variable +name consists of all the alphanumeric characters and underscores that +follow the @samp{$}. The next punctuation character ends the variable +name. Thus, @samp{$foo-bar} refers to the variable @code{foo} and expands +into @samp{tractor-bar}. +@end table + +When you use braces, you can also use various constructs to modify the +value that is substituted, or test it in various ways. + +@table @code +@item $@{@var{variable}:-@var{default}@} +Substitute the value of @var{variable}, but if that is empty or +undefined, use @var{default} instead. + +@item $@{@var{variable}:=@var{default}@} +Substitute the value of @var{variable}, but if that is empty or +undefined, use @var{default} instead and set the variable to +@var{default}. + +@item $@{@var{variable}:?@var{message}@} +If @var{variable} is defined and not empty, substitute its value. + +Otherwise, print @var{message} as an error message on the standard error +stream, and consider word expansion a failure. + +@c ??? How does wordexp report such an error? +@c WRDE_BADVAL is returned. + +@item $@{@var{variable}:+@var{replacement}@} +Substitute @var{replacement}, but only if @var{variable} is defined and +nonempty. Otherwise, substitute nothing for this construct. +@end table + +@table @code +@item $@{#@var{variable}@} +Substitute a numeral which expresses in base ten the number of +characters in the value of @var{variable}. @samp{$@{#foo@}} stands for +@samp{7}, because @samp{tractor} is seven characters. +@end table + +These variants of variable substitution let you remove part of the +variable's value before substituting it. The @var{prefix} and +@var{suffix} are not mere strings; they are wildcard patterns, just +like the patterns that you use to match multiple file names. But +in this context, they match against parts of the variable value +rather than against file names. + +@table @code +@item $@{@var{variable}%%@var{suffix}@} +Substitute the value of @var{variable}, but first discard from that +variable any portion at the end that matches the pattern @var{suffix}. + +If there is more than one alternative for how to match against +@var{suffix}, this construct uses the longest possible match. + +Thus, @samp{$@{foo%%r*@}} substitutes @samp{t}, because the largest +match for @samp{r*} at the end of @samp{tractor} is @samp{ractor}. + +@item $@{@var{variable}%@var{suffix}@} +Substitute the value of @var{variable}, but first discard from that +variable any portion at the end that matches the pattern @var{suffix}. + +If there is more than one alternative for how to match against +@var{suffix}, this construct uses the shortest possible alternative. + +Thus, @samp{$@{foo%r*@}} substitutes @samp{tracto}, because the shortest +match for @samp{r*} at the end of @samp{tractor} is just @samp{r}. + +@item $@{@var{variable}##@var{prefix}@} +Substitute the value of @var{variable}, but first discard from that +variable any portion at the beginning that matches the pattern @var{prefix}. + +If there is more than one alternative for how to match against +@var{prefix}, this construct uses the longest possible match. + +Thus, @samp{$@{foo##*t@}} substitutes @samp{or}, because the largest +match for @samp{*t} at the beginning of @samp{tractor} is @samp{tract}. + +@item $@{@var{variable}#@var{prefix}@} +Substitute the value of @var{variable}, but first discard from that +variable any portion at the beginning that matches the pattern @var{prefix}. + +If there is more than one alternative for how to match against +@var{prefix}, this construct uses the shortest possible alternative. + +Thus, @samp{$@{foo#*t@}} substitutes @samp{ractor}, because the shortest +match for @samp{*t} at the beginning of @samp{tractor} is just @samp{t}. + +@end table diff --git a/REORG.TODO/manual/pipe.texi b/REORG.TODO/manual/pipe.texi new file mode 100644 index 0000000000..2d7e30e796 --- /dev/null +++ b/REORG.TODO/manual/pipe.texi @@ -0,0 +1,319 @@ +@node Pipes and FIFOs, Sockets, File System Interface, Top +@c %MENU% A simple interprocess communication mechanism +@chapter Pipes and FIFOs + +@cindex pipe +A @dfn{pipe} is a mechanism for interprocess communication; data written +to the pipe by one process can be read by another process. The data is +handled in a first-in, first-out (FIFO) order. The pipe has no name; it +is created for one use and both ends must be inherited from the single +process which created the pipe. + +@cindex FIFO special file +A @dfn{FIFO special file} is similar to a pipe, but instead of being an +anonymous, temporary connection, a FIFO has a name or names like any +other file. Processes open the FIFO by name in order to communicate +through it. + +A pipe or FIFO has to be open at both ends simultaneously. If you read +from a pipe or FIFO file that doesn't have any processes writing to it +(perhaps because they have all closed the file, or exited), the read +returns end-of-file. Writing to a pipe or FIFO that doesn't have a +reading process is treated as an error condition; it generates a +@code{SIGPIPE} signal, and fails with error code @code{EPIPE} if the +signal is handled or blocked. + +Neither pipes nor FIFO special files allow file positioning. Both +reading and writing operations happen sequentially; reading from the +beginning of the file and writing at the end. + +@menu +* Creating a Pipe:: Making a pipe with the @code{pipe} function. +* Pipe to a Subprocess:: Using a pipe to communicate with a + child process. +* FIFO Special Files:: Making a FIFO special file. +* Pipe Atomicity:: When pipe (or FIFO) I/O is atomic. +@end menu + +@node Creating a Pipe +@section Creating a Pipe +@cindex creating a pipe +@cindex opening a pipe +@cindex interprocess communication, with pipes + +The primitive for creating a pipe is the @code{pipe} function. This +creates both the reading and writing ends of the pipe. It is not very +useful for a single process to use a pipe to talk to itself. In typical +use, a process creates a pipe just before it forks one or more child +processes (@pxref{Creating a Process}). The pipe is then used for +communication either between the parent or child processes, or between +two sibling processes. + +The @code{pipe} function is declared in the header file +@file{unistd.h}. +@pindex unistd.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun int pipe (int @var{filedes}@t{[2]}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +@c On Linux, syscall pipe2. On HURD, call socketpair. +The @code{pipe} function creates a pipe and puts the file descriptors +for the reading and writing ends of the pipe (respectively) into +@code{@var{filedes}[0]} and @code{@var{filedes}[1]}. + +An easy way to remember that the input end comes first is that file +descriptor @code{0} is standard input, and file descriptor @code{1} is +standard output. + +If successful, @code{pipe} returns a value of @code{0}. On failure, +@code{-1} is returned. The following @code{errno} error conditions are +defined for this function: + +@table @code +@item EMFILE +The process has too many files open. + +@item ENFILE +There are too many open files in the entire system. @xref{Error Codes}, +for more information about @code{ENFILE}. This error never occurs on +@gnuhurdsystems{}. +@end table +@end deftypefun + +Here is an example of a simple program that creates a pipe. This program +uses the @code{fork} function (@pxref{Creating a Process}) to create +a child process. The parent process writes data to the pipe, which is +read by the child process. + +@smallexample +@include pipe.c.texi +@end smallexample + +@node Pipe to a Subprocess +@section Pipe to a Subprocess +@cindex creating a pipe to a subprocess +@cindex pipe to a subprocess +@cindex filtering i/o through subprocess + +A common use of pipes is to send data to or receive data from a program +being run as a subprocess. One way of doing this is by using a combination of +@code{pipe} (to create the pipe), @code{fork} (to create the subprocess), +@code{dup2} (to force the subprocess to use the pipe as its standard input +or output channel), and @code{exec} (to execute the new program). Or, +you can use @code{popen} and @code{pclose}. + +The advantage of using @code{popen} and @code{pclose} is that the +interface is much simpler and easier to use. But it doesn't offer as +much flexibility as using the low-level functions directly. + +@comment stdio.h +@comment POSIX.2, SVID, BSD +@deftypefun {FILE *} popen (const char *@var{command}, const char *@var{mode}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c popen @ascuheap @asucorrupt @acucorrupt @aculock @acsfd @acsmem +@c malloc dup @ascuheap @acsmem +@c _IO_init ok +@c _IO_no_init ok +@c _IO_old_init ok +@c _IO_lock_init ok +@c _IO_new_file_init @asucorrupt @acucorrupt @aculock @acsfd +@c _IO_link_in @asucorrupt @acucorrupt @aculock @acsfd +@c the linked list is guarded by a recursive lock; +@c it may get corrupted with async signals and cancellation +@c _IO_lock_lock dup @aculock +@c _IO_flockfile dup @aculock +@c _IO_funlockfile dup @aculock +@c _IO_lock_unlock dup @aculock +@c _IO_new_proc_open @asucorrupt @acucorrupt @aculock @acsfd +@c the linked list is guarded by a recursive lock; + @c it may get corrupted with async signals and cancellation +@c _IO_file_is_open ok +@c pipe2 dup @acsfd +@c pipe dup @acsfd +@c _IO_fork=fork @aculock +@c _IO_close=close_not_cancel dup @acsfd +@c fcntl dup ok +@c _IO_lock_lock @aculock +@c _IO_lock_unlock @aculock +@c _IO_mask_flags ok [no @mtasurace:stream, nearly but sufficiently exclusive access] +@c _IO_un_link @asucorrupt @acucorrupt @aculock @acsfd +@c the linked list is guarded by a recursive lock; +@c it may get corrupted with async signals and cancellation +@c _IO_lock_lock dup @aculock +@c _IO_flockfile dup @aculock +@c _IO_funlockfile dup @aculock +@c _IO_lock_unlock dup @aculock +@c free dup @ascuheap @acsmem +The @code{popen} function is closely related to the @code{system} +function; see @ref{Running a Command}. It executes the shell command +@var{command} as a subprocess. However, instead of waiting for the +command to complete, it creates a pipe to the subprocess and returns a +stream that corresponds to that pipe. + +If you specify a @var{mode} argument of @code{"r"}, you can read from the +stream to retrieve data from the standard output channel of the subprocess. +The subprocess inherits its standard input channel from the parent process. + +Similarly, if you specify a @var{mode} argument of @code{"w"}, you can +write to the stream to send data to the standard input channel of the +subprocess. The subprocess inherits its standard output channel from +the parent process. + +In the event of an error @code{popen} returns a null pointer. This +might happen if the pipe or stream cannot be created, if the subprocess +cannot be forked, or if the program cannot be executed. +@end deftypefun + +@comment stdio.h +@comment POSIX.2, SVID, BSD +@deftypefun int pclose (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @ascuplugin{} @asucorrupt{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c Although the stream cannot be used after the call, even in case of +@c async cancellation, because the stream must not be used after pclose +@c is called, other stdio linked lists and their locks may be left in +@c corrupt states; that's where the corrupt and lock annotations come +@c from. +@c +@c pclose @ascuheap @ascuplugin @asucorrupt @asulock @acucorrupt @aculock @acsfd @acsmem +@c _IO_new_fclose @ascuheap @ascuplugin @asucorrupt @asulock @acucorrupt @aculock @acsfd @acsmem +@c _IO_un_link dup @asucorrupt @acucorrupt @aculock @acsfd +@c _IO_acquire_lock dup @aculock +@c _IO_flockfile dup @aculock +@c _IO_file_close_it @ascuheap @ascuplugin @asucorrupt @aculock @acucorrupt @acsfd @acsmem +@c _IO_file_is_open dup ok +@c _IO_do_flush @asucorrupt @ascuplugin @acucorrupt +@c _IO_do_write @asucorrupt @acucorrupt +@c new_do_write @asucorrupt @acucorrupt +@c _IO_SYSSEEK ok +@c lseek64 dup ok +@c _IO_SYSWRITE ok +@c write_not_cancel dup ok +@c write dup ok +@c _IO_adjust_column ok +@c _IO_setg dup @asucorrupt @acucorrupt [no @mtasurace:stream, locked] +@c _IO_wdo_write @asucorrupt @ascuplugin @acucorrupt +@c _IO_new_do_write=_IO_do_write dup @asucorrupt @acucorrupt +@c *cc->__codecvt_do_out @ascuplugin +@c _IO_wsetg dup @asucorrupt @acucorrupt [no @mtasurace:stream, locked] +@c _IO_unsave_markers @ascuheap @asucorrupt @acucorrupt @acsmem +@c _IO_have_backup dup ok +@c _IO_free_backup_area dup @ascuheap @asucorrupt @acucorrupt @acsmem +@c _IO_SYSCLOSE @aculock @acucorrupt @acsfd +@c _IO_lock_lock dup @aculock +@c _IO_close=close_not_cancel dup @acsfd +@c _IO_lock_unlock dup @aculock +@c _IO_waitpid=waitpid_not_cancel dup ok +@c _IO_have_wbackup ok +@c _IO_free_wbackup_area @ascuheap @asucorrupt @acucorrupt @acsmem +@c _IO_in_backup dup ok +@c _IO_switch_to_main_wget_area @asucorrupt @acucorrupt +@c free dup @ascuheap @acsmem +@c _IO_wsetb @asucorrupt @acucorrupt [no @mtasurace:stream, locked] +@c _IO_wsetg @asucorrupt @acucorrupt [no @mtasurace:stream, locked] +@c _IO_wsetp @asucorrupt @acucorrupt [no @mtasurace:stream, locked] +@c _IO_setb @asucorrupt @acucorrupt [no @mtasurace:stream, locked] +@c _IO_setg @asucorrupt @acucorrupt [no @mtasurace:stream, locked] +@c _IO_setp @asucorrupt @acucorrupt [no @mtasurace:stream, locked] +@c _IO_un_link dup @asucorrupt @acucorrupt @aculock @acsfd +@c _IO_release_lock dup @aculock +@c _IO_funlockfile dup @aculock +@c _IO_FINISH @ascuheap @ascuplugin @asucorrupt @acucorrupt @aculock @acsfd @acsmem +@c _IO_new_file_finish @ascuheap @ascuplugin @asucorrupt @acucorrupt @aculock @acsfd @acsmem +@c _IO_file_is_open dup ok +@c _IO_do_flush dup @ascuplugin @asucorrupt @acucorrupt +@c _IO_SYSCLOSE dup @aculock @acucorrupt @acsfd +@c _IO_default_finish @ascuheap @asucorrupt @acucorrupt @aculock @acsfd @acsmem +@c FREE_BUF @acsmem +@c munmap dup @acsmem +@c free dup @ascuheap @acsmem +@c _IO_un_link dup @asucorrupt @acucorrupt @aculock @acsfd +@c _IO_lock_fini ok +@c libc_lock_fini_recursive ok +@c libc_lock_lock dup @asulock @aculock +@c gconv_release_step ok +@c libc_lock_unlock dup @asulock @aculock +@c _IO_have_backup ok +@c _IO_free_backup_area @ascuheap @asucorrupt @acucorrupt @acsmem +@c _IO_in_backup ok +@c _IO_switch_to_main_get_area @asucorrupt @acucorrupt +@c free dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +The @code{pclose} function is used to close a stream created by @code{popen}. +It waits for the child process to terminate and returns its status value, +as for the @code{system} function. +@end deftypefun + +Here is an example showing how to use @code{popen} and @code{pclose} to +filter output through another program, in this case the paging program +@code{more}. + +@smallexample +@include popen.c.texi +@end smallexample + +@node FIFO Special Files +@section FIFO Special Files +@cindex creating a FIFO special file +@cindex interprocess communication, with FIFO + +A FIFO special file is similar to a pipe, except that it is created in a +different way. Instead of being an anonymous communications channel, a +FIFO special file is entered into the file system by calling +@code{mkfifo}. + +Once you have created a FIFO special file in this way, any process can +open it for reading or writing, in the same way as an ordinary file. +However, it has to be open at both ends simultaneously before you can +proceed to do any input or output operations on it. Opening a FIFO for +reading normally blocks until some other process opens the same FIFO for +writing, and vice versa. + +The @code{mkfifo} function is declared in the header file +@file{sys/stat.h}. +@pindex sys/stat.h + +@comment sys/stat.h +@comment POSIX.1 +@deftypefun int mkfifo (const char *@var{filename}, mode_t @var{mode}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On generic Posix, calls xmknod. +The @code{mkfifo} function makes a FIFO special file with name +@var{filename}. The @var{mode} argument is used to set the file's +permissions; see @ref{Setting Permissions}. + +The normal, successful return value from @code{mkfifo} is @code{0}. In +the case of an error, @code{-1} is returned. In addition to the usual +file name errors (@pxref{File Name Errors}), the following +@code{errno} error conditions are defined for this function: + +@table @code +@item EEXIST +The named file already exists. + +@item ENOSPC +The directory or file system cannot be extended. + +@item EROFS +The directory that would contain the file resides on a read-only file +system. +@end table +@end deftypefun + +@node Pipe Atomicity +@section Atomicity of Pipe I/O + +Reading or writing pipe data is @dfn{atomic} if the size of data written +is not greater than @code{PIPE_BUF}. This means that the data transfer +seems to be an instantaneous unit, in that nothing else in the system +can observe a state in which it is partially complete. Atomic I/O may +not begin right away (it may need to wait for buffer space or for data), +but once it does begin it finishes immediately. + +Reading or writing a larger amount of data may not be atomic; for +example, output data from other processes sharing the descriptor may be +interspersed. Also, once @code{PIPE_BUF} characters have been written, +further writes will block until some characters are read. + +@xref{Limits for Files}, for information about the @code{PIPE_BUF} +parameter. diff --git a/REORG.TODO/manual/platform.texi b/REORG.TODO/manual/platform.texi new file mode 100644 index 0000000000..cb166641fb --- /dev/null +++ b/REORG.TODO/manual/platform.texi @@ -0,0 +1,117 @@ +@node Platform, Contributors, Maintenance, Top +@c %MENU% Describe all platform-specific facilities provided +@appendix Platform-specific facilities + +@Theglibc{} can provide machine-specific functionality. + +@menu +* PowerPC:: Facilities Specific to the PowerPC Architecture +@end menu + +@node PowerPC +@appendixsec PowerPC-specific Facilities + +Facilities specific to PowerPC that are not specific to a particular +operating system are declared in @file{sys/platform/ppc.h}. + +@deftypefun {uint64_t} __ppc_get_timebase (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Read the current value of the Time Base Register. + +The @dfn{Time Base Register} is a 64-bit register that stores a monotonically +incremented value updated at a system-dependent frequency that may be +different from the processor frequency. More information is available in +@cite{Power ISA 2.06b - Book II - Section 5.2}. + +@code{__ppc_get_timebase} uses the processor's time base facility directly +without requiring assistance from the operating system, so it is very +efficient. +@end deftypefun + +@deftypefun {uint64_t} __ppc_get_timebase_freq (void) +@safety{@prelim{}@mtunsafe{@mtuinit{}}@asunsafe{@asucorrupt{:init}}@acunsafe{@acucorrupt{:init}}} +@c __ppc_get_timebase_freq=__get_timebase_freq @mtuinit @acsfd +@c __get_clockfreq @mtuinit @asucorrupt:init @acucorrupt:init @acsfd +@c the initialization of the static timebase_freq is not exactly +@c safe, because hp_timing_t cannot be atomically set up. +@c syscall:get_tbfreq ok +@c open dup @acsfd +@c read dup ok +@c memcpy dup ok +@c memmem dup ok +@c close dup @acsfd +Read the current frequency at which the Time Base Register is updated. + +This frequency is not related to the processor clock or the bus clock. +It is also possible that this frequency is not constant. More information is +available in @cite{Power ISA 2.06b - Book II - Section 5.2}. +@end deftypefun + +The following functions provide hints about the usage of resources that are +shared with other processors. They can be used, for example, if a program +waiting on a lock intends to divert the shared resources to be used by other +processors. More information is available in @cite{Power ISA 2.06b - Book II - +Section 3.2}. + +@deftypefun {void} __ppc_yield (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Provide a hint that performance will probably be improved if shared resources +dedicated to the executing processor are released for use by other processors. +@end deftypefun + +@deftypefun {void} __ppc_mdoio (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Provide a hint that performance will probably be improved if shared resources +dedicated to the executing processor are released until all outstanding storage +accesses to caching-inhibited storage have been completed. +@end deftypefun + +@deftypefun {void} __ppc_mdoom (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Provide a hint that performance will probably be improved if shared resources +dedicated to the executing processor are released until all outstanding storage +accesses to cacheable storage for which the data is not in the cache have been +completed. +@end deftypefun + +@deftypefun {void} __ppc_set_ppr_med (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Set the Program Priority Register to medium value (default). + +The @dfn{Program Priority Register} (PPR) is a 64-bit register that controls +the program's priority. By adjusting the PPR value the programmer may +improve system throughput by causing the system resources to be used +more efficiently, especially in contention situations. +The three unprivileged states available are covered by the functions +@code{__ppc_set_ppr_med} (medium -- default), @code{__ppc_set_ppc_low} (low) +and @code{__ppc_set_ppc_med_low} (medium low). More information +available in @cite{Power ISA 2.06b - Book II - Section 3.1}. +@end deftypefun + +@deftypefun {void} __ppc_set_ppr_low (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Set the Program Priority Register to low value. +@end deftypefun + +@deftypefun {void} __ppc_set_ppr_med_low (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Set the Program Priority Register to medium low value. +@end deftypefun + +Power ISA 2.07 extends the priorities that can be set to the Program Priority +Register (PPR). The following functions implement the new priority levels: +very low and medium high. + +@deftypefun {void} __ppc_set_ppr_very_low (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Set the Program Priority Register to very low value. +@end deftypefun + +@deftypefun {void} __ppc_set_ppr_med_high (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Set the Program Priority Register to medium high value. The medium high +priority is privileged and may only be set during certain time intervals by +problem-state programs. If the program priority is medium high when the time +interval expires or if an attempt is made to set the priority to medium high +when it is not allowed, the priority is set to medium. +@end deftypefun diff --git a/REORG.TODO/manual/probes.texi b/REORG.TODO/manual/probes.texi new file mode 100644 index 0000000000..eb91c62703 --- /dev/null +++ b/REORG.TODO/manual/probes.texi @@ -0,0 +1,416 @@ +@node Internal Probes +@c @node Internal Probes, Tunables, POSIX Threads, Top +@c %MENU% Probes to monitor libc internal behavior +@chapter Internal probes + +In order to aid in debugging and monitoring internal behavior, +@theglibc{} exposes nearly-zero-overhead SystemTap probes marked with +the @code{libc} provider. + +These probes are not part of the @glibcadj{} stable ABI, and they are +subject to change or removal across releases. Our only promise with +regard to them is that, if we find a need to remove or modify the +arguments of a probe, the modified probe will have a different name, so +that program monitors relying on the old probe will not get unexpected +arguments. + +@menu +* Memory Allocation Probes:: Probes in the memory allocation subsystem +* Mathematical Function Probes:: Probes in mathematical functions +* Non-local Goto Probes:: Probes in setjmp and longjmp +@end menu + +@node Memory Allocation Probes +@section Memory Allocation Probes + +These probes are designed to signal relatively unusual situations within +the virtual memory subsystem of @theglibc{}. + +@deftp Probe memory_sbrk_more (void *@var{$arg1}, size_t @var{$arg2}) +This probe is triggered after the main arena is extended by calling +@code{sbrk}. Argument @var{$arg1} is the additional size requested to +@code{sbrk}, and @var{$arg2} is the pointer that marks the end of the +@code{sbrk} area, returned in response to the request. +@end deftp + +@deftp Probe memory_sbrk_less (void *@var{$arg1}, size_t @var{$arg2}) +This probe is triggered after the size of the main arena is decreased by +calling @code{sbrk}. Argument @var{$arg1} is the size released by +@code{sbrk} (the positive value, rather than the negative value passed +to @code{sbrk}), and @var{$arg2} is the pointer that marks the end of +the @code{sbrk} area, returned in response to the request. +@end deftp + +@deftp Probe memory_heap_new (void *@var{$arg1}, size_t @var{$arg2}) +This probe is triggered after a new heap is @code{mmap}ed. Argument +@var{$arg1} is a pointer to the base of the memory area, where the +@code{heap_info} data structure is held, and @var{$arg2} is the size of +the heap. +@end deftp + +@deftp Probe memory_heap_free (void *@var{$arg1}, size_t @var{$arg2}) +This probe is triggered @emph{before} (unlike the other sbrk and heap +probes) a heap is completely removed via @code{munmap}. Argument +@var{$arg1} is a pointer to the heap, and @var{$arg2} is the size of the +heap. +@end deftp + +@deftp Probe memory_heap_more (void *@var{$arg1}, size_t @var{$arg2}) +This probe is triggered after a trailing portion of an @code{mmap}ed +heap is extended. Argument @var{$arg1} is a pointer to the heap, and +@var{$arg2} is the new size of the heap. +@end deftp + +@deftp Probe memory_heap_less (void *@var{$arg1}, size_t @var{$arg2}) +This probe is triggered after a trailing portion of an @code{mmap}ed +heap is released. Argument @var{$arg1} is a pointer to the heap, and +@var{$arg2} is the new size of the heap. +@end deftp + +@deftp Probe memory_malloc_retry (size_t @var{$arg1}) +@deftpx Probe memory_realloc_retry (size_t @var{$arg1}, void *@var{$arg2}) +@deftpx Probe memory_memalign_retry (size_t @var{$arg1}, size_t @var{$arg2}) +@deftpx Probe memory_calloc_retry (size_t @var{$arg1}) +These probes are triggered when the corresponding functions fail to +obtain the requested amount of memory from the arena in use, before they +call @code{arena_get_retry} to select an alternate arena in which to +retry the allocation. Argument @var{$arg1} is the amount of memory +requested by the user; in the @code{calloc} case, that is the total size +computed from both function arguments. In the @code{realloc} case, +@var{$arg2} is the pointer to the memory area being resized. In the +@code{memalign} case, @var{$arg2} is the alignment to be used for the +request, which may be stricter than the value passed to the +@code{memalign} function. A @code{memalign} probe is also used by functions +@code{posix_memalign, valloc} and @code{pvalloc}. + +Note that the argument order does @emph{not} match that of the +corresponding two-argument functions, so that in all of these probes the +user-requested allocation size is in @var{$arg1}. +@end deftp + +@deftp Probe memory_arena_retry (size_t @var{$arg1}, void *@var{$arg2}) +This probe is triggered within @code{arena_get_retry} (the function +called to select the alternate arena in which to retry an allocation +that failed on the first attempt), before the selection of an alternate +arena. This probe is redundant, but much easier to use when it's not +important to determine which of the various memory allocation functions +is failing to allocate on the first try. Argument @var{$arg1} is the +same as in the function-specific probes, except for extra room for +padding introduced by functions that have to ensure stricter alignment. +Argument @var{$arg2} is the arena in which allocation failed. +@end deftp + +@deftp Probe memory_arena_new (void *@var{$arg1}, size_t @var{$arg2}) +This probe is triggered when @code{malloc} allocates and initializes an +additional arena (not the main arena), but before the arena is assigned +to the running thread or inserted into the internal linked list of +arenas. The arena's @code{malloc_state} internal data structure is +located at @var{$arg1}, within a newly-allocated heap big enough to hold +at least @var{$arg2} bytes. +@end deftp + +@deftp Probe memory_arena_reuse (void *@var{$arg1}, void *@var{$arg2}) +This probe is triggered when @code{malloc} has just selected an existing +arena to reuse, and (temporarily) reserved it for exclusive use. +Argument @var{$arg1} is a pointer to the newly-selected arena, and +@var{$arg2} is a pointer to the arena previously used by that thread. + +This occurs within +@code{reused_arena}, right after the mutex mentioned in probe +@code{memory_arena_reuse_wait} is acquired; argument @var{$arg1} will +point to the same arena. In this configuration, this will usually only +occur once per thread. The exception is when a thread first selected +the main arena, but a subsequent allocation from it fails: then, and +only then, may we switch to another arena to retry that allocation, and +for further allocations within that thread. +@end deftp + +@deftp Probe memory_arena_reuse_wait (void *@var{$arg1}, void *@var{$arg2}, void *@var{$arg3}) +This probe is triggered when @code{malloc} is about to wait for an arena +to become available for reuse. Argument @var{$arg1} holds a pointer to +the mutex the thread is going to wait on, @var{$arg2} is a pointer to a +newly-chosen arena to be reused, and @var{$arg3} is a pointer to the +arena previously used by that thread. + +This occurs within +@code{reused_arena}, when a thread first tries to allocate memory or +needs a retry after a failure to allocate from the main arena, there +isn't any free arena, the maximum number of arenas has been reached, and +an existing arena was chosen for reuse, but its mutex could not be +immediately acquired. The mutex in @var{$arg1} is the mutex of the +selected arena. +@end deftp + +@deftp Probe memory_arena_reuse_free_list (void *@var{$arg1}) +This probe is triggered when @code{malloc} has chosen an arena that is +in the free list for use by a thread, within the @code{get_free_list} +function. The argument @var{$arg1} holds a pointer to the selected arena. +@end deftp + +@deftp Probe memory_mallopt (int @var{$arg1}, int @var{$arg2}) +This probe is triggered when function @code{mallopt} is called to change +@code{malloc} internal configuration parameters, before any change to +the parameters is made. The arguments @var{$arg1} and @var{$arg2} are +the ones passed to the @code{mallopt} function. +@end deftp + +@deftp Probe memory_mallopt_mxfast (int @var{$arg1}, int @var{$arg2}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_MXFAST}, and the requested +value is in an acceptable range. Argument @var{$arg1} is the requested +value, and @var{$arg2} is the previous value of this @code{malloc} +parameter. +@end deftp + +@deftp Probe memory_mallopt_trim_threshold (int @var{$arg1}, int @var{$arg2}, int @var{$arg3}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_TRIM_THRESHOLD}. Argument +@var{$arg1} is the requested value, @var{$arg2} is the previous value of +this @code{malloc} parameter, and @var{$arg3} is nonzero if dynamic +threshold adjustment was already disabled. +@end deftp + +@deftp Probe memory_mallopt_top_pad (int @var{$arg1}, int @var{$arg2}, int @var{$arg3}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_TOP_PAD}. Argument +@var{$arg1} is the requested value, @var{$arg2} is the previous value of +this @code{malloc} parameter, and @var{$arg3} is nonzero if dynamic +threshold adjustment was already disabled. +@end deftp + +@deftp Probe memory_mallopt_mmap_threshold (int @var{$arg1}, int @var{$arg2}, int @var{$arg3}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_MMAP_THRESHOLD}, and the +requested value is in an acceptable range. Argument @var{$arg1} is the +requested value, @var{$arg2} is the previous value of this @code{malloc} +parameter, and @var{$arg3} is nonzero if dynamic threshold adjustment +was already disabled. +@end deftp + +@deftp Probe memory_mallopt_mmap_max (int @var{$arg1}, int @var{$arg2}, int @var{$arg3}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_MMAP_MAX}. Argument +@var{$arg1} is the requested value, @var{$arg2} is the previous value of +this @code{malloc} parameter, and @var{$arg3} is nonzero if dynamic +threshold adjustment was already disabled. +@end deftp + +@deftp Probe memory_mallopt_check_action (int @var{$arg1}, int @var{$arg2}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_CHECK_ACTION}. Argument +@var{$arg1} is the requested value, and @var{$arg2} is the previous +value of this @code{malloc} parameter. +@end deftp + +@deftp Probe memory_mallopt_perturb (int @var{$arg1}, int @var{$arg2}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_PERTURB}. Argument +@var{$arg1} is the requested value, and @var{$arg2} is the previous +value of this @code{malloc} parameter. +@end deftp + +@deftp Probe memory_mallopt_arena_test (int @var{$arg1}, int @var{$arg2}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_ARENA_TEST}, and the +requested value is in an acceptable range. Argument @var{$arg1} is the +requested value, and @var{$arg2} is the previous value of this +@code{malloc} parameter. +@end deftp + +@deftp Probe memory_mallopt_arena_max (int @var{$arg1}, int @var{$arg2}) +This probe is triggered shortly after the @code{memory_mallopt} probe, +when the parameter to be changed is @code{M_ARENA_MAX}, and the +requested value is in an acceptable range. Argument @var{$arg1} is the +requested value, and @var{$arg2} is the previous value of this +@code{malloc} parameter. +@end deftp + +@deftp Probe memory_mallopt_free_dyn_thresholds (int @var{$arg1}, int @var{$arg2}) +This probe is triggered when function @code{free} decides to adjust the +dynamic brk/mmap thresholds. Argument @var{$arg1} and @var{$arg2} are +the adjusted mmap and trim thresholds, respectively. +@end deftp + +@node Mathematical Function Probes +@section Mathematical Function Probes + +Some mathematical functions fall back to multiple precision arithmetic for +some inputs to get last bit precision for their return values. This multiple +precision fallback is much slower than the default algorithms and may have a +significant impact on application performance. The systemtap probe markers +described in this section may help you determine if your application calls +mathematical functions with inputs that may result in multiple-precision +arithmetic. + +Unless explicitly mentioned otherwise, a precision of 1 implies 24 bits of +precision in the mantissa of the multiple precision number. Hence, a precision +level of 32 implies 768 bits of precision in the mantissa. + +@deftp Probe slowexp_p6 (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{exp} function is called with an +input that results in multiple precision computation with precision +6. Argument @var{$arg1} is the input value and @var{$arg2} is the +computed output. +@end deftp + +@deftp Probe slowexp_p32 (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{exp} function is called with an +input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input value and @var{$arg2} is the +computed output. +@end deftp + +@deftp Probe slowpow_p10 (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4}) +This probe is triggered when the @code{pow} function is called with +inputs that result in multiple precision computation with precision +10. Arguments @var{$arg1} and @var{$arg2} are the input values, +@code{$arg3} is the value computed in the fast phase of the algorithm +and @code{$arg4} is the final accurate value. +@end deftp + +@deftp Probe slowpow_p32 (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4}) +This probe is triggered when the @code{pow} function is called with an +input that results in multiple precision computation with precision +32. Arguments @var{$arg1} and @var{$arg2} are the input values, +@code{$arg3} is the value computed in the fast phase of the algorithm +and @code{$arg4} is the final accurate value. +@end deftp + +@deftp Probe slowlog (int @var{$arg1}, double @var{$arg2}, double @var{$arg3}) +This probe is triggered when the @code{log} function is called with an +input that results in multiple precision computation. Argument +@var{$arg1} is the precision with which the computation succeeded. +Argument @var{$arg2} is the input and @var{$arg3} is the computed +output. +@end deftp + +@deftp Probe slowlog_inexact (int @var{$arg1}, double @var{$arg2}, double @var{$arg3}) +This probe is triggered when the @code{log} function is called with an +input that results in multiple precision computation and none of the +multiple precision computations result in an accurate result. +Argument @var{$arg1} is the maximum precision with which computations +were performed. Argument @var{$arg2} is the input and @var{$arg3} is +the computed output. +@end deftp + +@deftp Probe slowatan2 (int @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4}) +This probe is triggered when the @code{atan2} function is called with +an input that results in multiple precision computation. Argument +@var{$arg1} is the precision with which computation succeeded. +Arguments @var{$arg2} and @var{$arg3} are inputs to the @code{atan2} +function and @var{$arg4} is the computed result. +@end deftp + +@deftp Probe slowatan2_inexact (int @var{$arg1}, double @var{$arg2}, double @var{$arg3}, double @var{$arg4}) +This probe is triggered when the @code{atan} function is called with +an input that results in multiple precision computation and none of +the multiple precision computations result in an accurate result. +Argument @var{$arg1} is the maximum precision with which computations +were performed. Arguments @var{$arg2} and @var{$arg3} are inputs to +the @code{atan2} function and @var{$arg4} is the computed result. +@end deftp + +@deftp Probe slowatan (int @var{$arg1}, double @var{$arg2}, double @var{$arg3}) +This probe is triggered when the @code{atan} function is called with +an input that results in multiple precision computation. Argument +@var{$arg1} is the precision with which computation succeeded. +Argument @var{$arg2} is the input to the @code{atan} function and +@var{$arg3} is the computed result. +@end deftp + +@deftp Probe slowatan_inexact (int @var{$arg1}, double @var{$arg2}, double @var{$arg3}) +This probe is triggered when the @code{atan} function is called with +an input that results in multiple precision computation and none of +the multiple precision computations result in an accurate result. +Argument @var{$arg1} is the maximum precision with which computations +were performed. Argument @var{$arg2} is the input to the @code{atan} +function and @var{$arg3} is the computed result. +@end deftp + +@deftp Probe slowtan (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{tan} function is called with an +input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input to the function and @var{$arg2} +is the computed result. +@end deftp + +@deftp Probe slowasin (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{asin} function is called with +an input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input to the function and @var{$arg2} +is the computed result. +@end deftp + +@deftp Probe slowacos (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{acos} function is called with +an input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input to the function and @var{$arg2} +is the computed result. +@end deftp + +@deftp Probe slowsin (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{sin} function is called with an +input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input to the function and @var{$arg2} +is the computed result. +@end deftp + +@deftp Probe slowcos (double @var{$arg1}, double @var{$arg2}) +This probe is triggered when the @code{cos} function is called with an +input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input to the function and @var{$arg2} +is the computed result. +@end deftp + +@deftp Probe slowsin_dx (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}) +This probe is triggered when the @code{sin} function is called with an +input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input to the function, @var{$arg2} is +the error bound of @var{$arg1} and @var{$arg3} is the computed result. +@end deftp + +@deftp Probe slowcos_dx (double @var{$arg1}, double @var{$arg2}, double @var{$arg3}) +This probe is triggered when the @code{cos} function is called with an +input that results in multiple precision computation with precision +32. Argument @var{$arg1} is the input to the function, @var{$arg2} is +the error bound of @var{$arg1} and @var{$arg3} is the computed result. +@end deftp + +@node Non-local Goto Probes +@section Non-local Goto Probes + +These probes are used to signal calls to @code{setjmp}, @code{sigsetjmp}, +@code{longjmp} or @code{siglongjmp}. + +@deftp Probe setjmp (void *@var{$arg1}, int @var{$arg2}, void *@var{$arg3}) +This probe is triggered whenever @code{setjmp} or @code{sigsetjmp} is +called. Argument @var{$arg1} is a pointer to the @code{jmp_buf} +passed as the first argument of @code{setjmp} or @code{sigsetjmp}, +@var{$arg2} is the second argument of @code{sigsetjmp} or zero if this +is a call to @code{setjmp} and @var{$arg3} is a pointer to the return +address that will be stored in the @code{jmp_buf}. +@end deftp + +@deftp Probe longjmp (void *@var{$arg1}, int @var{$arg2}, void *@var{$arg3}) +This probe is triggered whenever @code{longjmp} or @code{siglongjmp} +is called. Argument @var{$arg1} is a pointer to the @code{jmp_buf} +passed as the first argument of @code{longjmp} or @code{siglongjmp}, +@var{$arg2} is the return value passed as the second argument of +@code{longjmp} or @code{siglongjmp} and @var{$arg3} is a pointer to +the return address @code{longjmp} or @code{siglongjmp} will return to. + +The @code{longjmp} probe is triggered at a point where the registers +have not yet been restored to the values in the @code{jmp_buf} and +unwinding will show a call stack including the caller of +@code{longjmp} or @code{siglongjmp}. +@end deftp + +@deftp Probe longjmp_target (void *@var{$arg1}, int @var{$arg2}, void *@var{$arg3}) +This probe is triggered under the same conditions and with the same +arguments as the @code{longjmp} probe. + +The @code{longjmp_target} probe is triggered at a point where the +registers have been restored to the values in the @code{jmp_buf} and +unwinding will show a call stack including the caller of @code{setjmp} +or @code{sigsetjmp}. +@end deftp diff --git a/REORG.TODO/manual/process.texi b/REORG.TODO/manual/process.texi new file mode 100644 index 0000000000..085fdec926 --- /dev/null +++ b/REORG.TODO/manual/process.texi @@ -0,0 +1,851 @@ +@node Processes, Inter-Process Communication, Program Basics, Top +@c %MENU% How to create processes and run other programs +@chapter Processes + +@cindex process +@dfn{Processes} are the primitive units for allocation of system +resources. Each process has its own address space and (usually) one +thread of control. A process executes a program; you can have multiple +processes executing the same program, but each process has its own copy +of the program within its own address space and executes it +independently of the other copies. + +@cindex child process +@cindex parent process +Processes are organized hierarchically. Each process has a @dfn{parent +process} which explicitly arranged to create it. The processes created +by a given parent are called its @dfn{child processes}. A child +inherits many of its attributes from the parent process. + +This chapter describes how a program can create, terminate, and control +child processes. Actually, there are three distinct operations +involved: creating a new child process, causing the new process to +execute a program, and coordinating the completion of the child process +with the original program. + +The @code{system} function provides a simple, portable mechanism for +running another program; it does all three steps automatically. If you +need more control over the details of how this is done, you can use the +primitive functions to do each step individually instead. + +@menu +* Running a Command:: The easy way to run another program. +* Process Creation Concepts:: An overview of the hard way to do it. +* Process Identification:: How to get the process ID of a process. +* Creating a Process:: How to fork a child process. +* Executing a File:: How to make a process execute another program. +* Process Completion:: How to tell when a child process has completed. +* Process Completion Status:: How to interpret the status value + returned from a child process. +* BSD Wait Functions:: More functions, for backward compatibility. +* Process Creation Example:: A complete example program. +@end menu + + +@node Running a Command +@section Running a Command +@cindex running a command + +The easy way to run another program is to use the @code{system} +function. This function does all the work of running a subprogram, but +it doesn't give you much control over the details: you have to wait +until the subprogram terminates before you can do anything else. + +@comment stdlib.h +@comment ISO +@deftypefun int system (const char *@var{command}) +@pindex sh +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{}}} +@c system @ascuplugin @ascuheap @asulock @aculock @acsmem +@c do_system @ascuplugin @ascuheap @asulock @aculock @acsmem +@c sigemptyset dup ok +@c libc_lock_lock @asulock @aculock +@c ADD_REF ok +@c sigaction dup ok +@c SUB_REF ok +@c libc_lock_unlock @aculock +@c sigaddset dup ok +@c sigprocmask dup ok +@c CLEANUP_HANDLER @ascuplugin @ascuheap @acsmem +@c libc_cleanup_region_start @ascuplugin @ascuheap @acsmem +@c pthread_cleanup_push_defer @ascuplugin @ascuheap @acsmem +@c CANCELLATION_P @ascuplugin @ascuheap @acsmem +@c CANCEL_ENABLED_AND_CANCELED ok +@c do_cancel @ascuplugin @ascuheap @acsmem +@c cancel_handler ok +@c kill syscall ok +@c waitpid dup ok +@c libc_lock_lock ok +@c sigaction dup ok +@c libc_lock_unlock ok +@c FORK ok +@c clone syscall ok +@c waitpid dup ok +@c CLEANUP_RESET ok +@c libc_cleanup_region_end ok +@c pthread_cleanup_pop_restore ok +@c SINGLE_THREAD_P ok +@c LIBC_CANCEL_ASYNC @ascuplugin @ascuheap @acsmem +@c libc_enable_asynccancel @ascuplugin @ascuheap @acsmem +@c CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS dup ok +@c do_cancel dup @ascuplugin @ascuheap @acsmem +@c LIBC_CANCEL_RESET ok +@c libc_disable_asynccancel ok +@c lll_futex_wait dup ok +This function executes @var{command} as a shell command. In @theglibc{}, +it always uses the default shell @code{sh} to run the command. +In particular, it searches the directories in @code{PATH} to find +programs to execute. The return value is @code{-1} if it wasn't +possible to create the shell process, and otherwise is the status of the +shell process. @xref{Process Completion}, for details on how this +status code can be interpreted. + +If the @var{command} argument is a null pointer, a return value of zero +indicates that no command processor is available. + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{system} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this calls to @code{system} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +@pindex stdlib.h +The @code{system} function is declared in the header file +@file{stdlib.h}. +@end deftypefun + +@strong{Portability Note:} Some C implementations may not have any +notion of a command processor that can execute other programs. You can +determine whether a command processor exists by executing +@w{@code{system (NULL)}}; if the return value is nonzero, a command +processor is available. + +The @code{popen} and @code{pclose} functions (@pxref{Pipe to a +Subprocess}) are closely related to the @code{system} function. They +allow the parent process to communicate with the standard input and +output channels of the command being executed. + +@node Process Creation Concepts +@section Process Creation Concepts + +This section gives an overview of processes and of the steps involved in +creating a process and making it run another program. + +@cindex process ID +@cindex process lifetime +Each process is named by a @dfn{process ID} number. A unique process ID +is allocated to each process when it is created. The @dfn{lifetime} of +a process ends when its termination is reported to its parent process; +at that time, all of the process resources, including its process ID, +are freed. + +@cindex creating a process +@cindex forking a process +@cindex child process +@cindex parent process +Processes are created with the @code{fork} system call (so the operation +of creating a new process is sometimes called @dfn{forking} a process). +The @dfn{child process} created by @code{fork} is a copy of the original +@dfn{parent process}, except that it has its own process ID. + +After forking a child process, both the parent and child processes +continue to execute normally. If you want your program to wait for a +child process to finish executing before continuing, you must do this +explicitly after the fork operation, by calling @code{wait} or +@code{waitpid} (@pxref{Process Completion}). These functions give you +limited information about why the child terminated---for example, its +exit status code. + +A newly forked child process continues to execute the same program as +its parent process, at the point where the @code{fork} call returns. +You can use the return value from @code{fork} to tell whether the program +is running in the parent process or the child. + +@cindex process image +Having several processes run the same program is only occasionally +useful. But the child can execute another program using one of the +@code{exec} functions; see @ref{Executing a File}. The program that the +process is executing is called its @dfn{process image}. Starting +execution of a new program causes the process to forget all about its +previous process image; when the new program exits, the process exits +too, instead of returning to the previous process image. + +@node Process Identification +@section Process Identification + +The @code{pid_t} data type represents process IDs. You can get the +process ID of a process by calling @code{getpid}. The function +@code{getppid} returns the process ID of the parent of the current +process (this is also known as the @dfn{parent process ID}). Your +program should include the header files @file{unistd.h} and +@file{sys/types.h} to use these functions. +@pindex sys/types.h +@pindex unistd.h + +@comment sys/types.h +@comment POSIX.1 +@deftp {Data Type} pid_t +The @code{pid_t} data type is a signed integer type which is capable +of representing a process ID. In @theglibc{}, this is an @code{int}. +@end deftp + +@comment unistd.h +@comment POSIX.1 +@deftypefun pid_t getpid (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getpid} function returns the process ID of the current process. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun pid_t getppid (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getppid} function returns the process ID of the parent of the +current process. +@end deftypefun + +@node Creating a Process +@section Creating a Process + +The @code{fork} function is the primitive for creating a process. +It is declared in the header file @file{unistd.h}. +@pindex unistd.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun pid_t fork (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuplugin{}}@acunsafe{@aculock{}}} +@c The nptl/.../linux implementation safely collects fork_handlers into +@c an alloca()ed linked list and increments ref counters; it uses atomic +@c ops and retries, avoiding locking altogether. It then takes the +@c IO_list lock, resets the thread-local pid, and runs fork. The parent +@c restores the thread-local pid, releases the lock, and runs parent +@c handlers, decrementing the ref count and signaling futex wait if +@c requested by unregister_atfork. The child bumps the fork generation, +@c sets the thread-local pid, resets cpu clocks, initializes the robust +@c mutex list, the stream locks, the IO_list lock, the dynamic loader +@c lock, runs the child handlers, reseting ref counters to 1, and +@c initializes the fork lock. These are all safe, unless atfork +@c handlers themselves are unsafe. +The @code{fork} function creates a new process. + +If the operation is successful, there are then both parent and child +processes and both see @code{fork} return, but with different values: it +returns a value of @code{0} in the child process and returns the child's +process ID in the parent process. + +If process creation failed, @code{fork} returns a value of @code{-1} in +the parent process. The following @code{errno} error conditions are +defined for @code{fork}: + +@table @code +@item EAGAIN +There aren't enough system resources to create another process, or the +user already has too many processes running. This means exceeding the +@code{RLIMIT_NPROC} resource limit, which can usually be increased; +@pxref{Limits on Resources}. + +@item ENOMEM +The process requires more space than the system can supply. +@end table +@end deftypefun + +The specific attributes of the child process that differ from the +parent process are: + +@itemize @bullet +@item +The child process has its own unique process ID. + +@item +The parent process ID of the child process is the process ID of its +parent process. + +@item +The child process gets its own copies of the parent process's open file +descriptors. Subsequently changing attributes of the file descriptors +in the parent process won't affect the file descriptors in the child, +and vice versa. @xref{Control Operations}. However, the file position +associated with each descriptor is shared by both processes; +@pxref{File Position}. + +@item +The elapsed processor times for the child process are set to zero; +see @ref{Processor Time}. + +@item +The child doesn't inherit file locks set by the parent process. +@c !!! flock locks shared +@xref{Control Operations}. + +@item +The child doesn't inherit alarms set by the parent process. +@xref{Setting an Alarm}. + +@item +The set of pending signals (@pxref{Delivery of Signal}) for the child +process is cleared. (The child process inherits its mask of blocked +signals and signal actions from the parent process.) +@end itemize + + +@comment unistd.h +@comment BSD +@deftypefun pid_t vfork (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuplugin{}}@acunsafe{@aculock{}}} +@c The vfork implementation proper is a safe syscall, but it may fall +@c back to fork if the vfork syscall is not available. +The @code{vfork} function is similar to @code{fork} but on some systems +it is more efficient; however, there are restrictions you must follow to +use it safely. + +While @code{fork} makes a complete copy of the calling process's address +space and allows both the parent and child to execute independently, +@code{vfork} does not make this copy. Instead, the child process +created with @code{vfork} shares its parent's address space until it +calls @code{_exit} or one of the @code{exec} functions. In the +meantime, the parent process suspends execution. + +You must be very careful not to allow the child process created with +@code{vfork} to modify any global data or even local variables shared +with the parent. Furthermore, the child process cannot return from (or +do a long jump out of) the function that called @code{vfork}! This +would leave the parent process's control information very confused. If +in doubt, use @code{fork} instead. + +Some operating systems don't really implement @code{vfork}. @Theglibc{} +permits you to use @code{vfork} on all systems, but actually +executes @code{fork} if @code{vfork} isn't available. If you follow +the proper precautions for using @code{vfork}, your program will still +work even if the system uses @code{fork} instead. +@end deftypefun + +@node Executing a File +@section Executing a File +@cindex executing a file +@cindex @code{exec} functions + +This section describes the @code{exec} family of functions, for executing +a file as a process image. You can use these functions to make a child +process execute a new program after it has been forked. + +To see the effects of @code{exec} from the point of view of the called +program, see @ref{Program Basics}. + +@pindex unistd.h +The functions in this family differ in how you specify the arguments, +but otherwise they all do the same thing. They are declared in the +header file @file{unistd.h}. + +@comment unistd.h +@comment POSIX.1 +@deftypefun int execv (const char *@var{filename}, char *const @var{argv}@t{[]}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{execv} function executes the file named by @var{filename} as a +new process image. + +The @var{argv} argument is an array of null-terminated strings that is +used to provide a value for the @code{argv} argument to the @code{main} +function of the program to be executed. The last element of this array +must be a null pointer. By convention, the first element of this array +is the file name of the program sans directory names. @xref{Program +Arguments}, for full details on how programs can access these arguments. + +The environment for the new process image is taken from the +@code{environ} variable of the current process image; see +@ref{Environment Variables}, for information about environments. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int execl (const char *@var{filename}, const char *@var{arg0}, @dots{}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is similar to @code{execv}, but the @var{argv} strings are +specified individually instead of as an array. A null pointer must be +passed as the last such argument. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int execve (const char *@var{filename}, char *const @var{argv}@t{[]}, char *const @var{env}@t{[]}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is similar to @code{execv}, but permits you to specify the environment +for the new program explicitly as the @var{env} argument. This should +be an array of strings in the same format as for the @code{environ} +variable; see @ref{Environment Access}. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int execle (const char *@var{filename}, const char *@var{arg0}, @dots{}, char *const @var{env}@t{[]}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is similar to @code{execl}, but permits you to specify the +environment for the new program explicitly. The environment argument is +passed following the null pointer that marks the last @var{argv} +argument, and should be an array of strings in the same format as for +the @code{environ} variable. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int execvp (const char *@var{filename}, char *const @var{argv}@t{[]}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{execvp} function is similar to @code{execv}, except that it +searches the directories listed in the @code{PATH} environment variable +(@pxref{Standard Environment}) to find the full file name of a +file from @var{filename} if @var{filename} does not contain a slash. + +This function is useful for executing system utility programs, because +it looks for them in the places that the user has chosen. Shells use it +to run the commands that users type. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int execlp (const char *@var{filename}, const char *@var{arg0}, @dots{}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This function is like @code{execl}, except that it performs the same +file name searching as the @code{execvp} function. +@end deftypefun + +The size of the argument list and environment list taken together must +not be greater than @code{ARG_MAX} bytes. @xref{General Limits}. On +@gnuhurdsystems{}, the size (which compares against @code{ARG_MAX}) +includes, for each string, the number of characters in the string, plus +the size of a @code{char *}, plus one, rounded up to a multiple of the +size of a @code{char *}. Other systems may have somewhat different +rules for counting. + +These functions normally don't return, since execution of a new program +causes the currently executing program to go away completely. A value +of @code{-1} is returned in the event of a failure. In addition to the +usual file name errors (@pxref{File Name Errors}), the following +@code{errno} error conditions are defined for these functions: + +@table @code +@item E2BIG +The combined size of the new program's argument list and environment +list is larger than @code{ARG_MAX} bytes. @gnuhurdsystems{} have no +specific limit on the argument list size, so this error code cannot +result, but you may get @code{ENOMEM} instead if the arguments are too +big for available memory. + +@item ENOEXEC +The specified file can't be executed because it isn't in the right format. + +@item ENOMEM +Executing the specified file requires more storage than is available. +@end table + +If execution of the new file succeeds, it updates the access time field +of the file as if the file had been read. @xref{File Times}, for more +details about access times of files. + +The point at which the file is closed again is not specified, but +is at some point before the process exits or before another process +image is executed. + +Executing a new process image completely changes the contents of memory, +copying only the argument and environment strings to new locations. But +many other attributes of the process are unchanged: + +@itemize @bullet +@item +The process ID and the parent process ID. @xref{Process Creation Concepts}. + +@item +Session and process group membership. @xref{Concepts of Job Control}. + +@item +Real user ID and group ID, and supplementary group IDs. @xref{Process +Persona}. + +@item +Pending alarms. @xref{Setting an Alarm}. + +@item +Current working directory and root directory. @xref{Working +Directory}. On @gnuhurdsystems{}, the root directory is not copied when +executing a setuid program; instead the system default root directory +is used for the new program. + +@item +File mode creation mask. @xref{Setting Permissions}. + +@item +Process signal mask; see @ref{Process Signal Mask}. + +@item +Pending signals; see @ref{Blocking Signals}. + +@item +Elapsed processor time associated with the process; see @ref{Processor Time}. +@end itemize + +If the set-user-ID and set-group-ID mode bits of the process image file +are set, this affects the effective user ID and effective group ID +(respectively) of the process. These concepts are discussed in detail +in @ref{Process Persona}. + +Signals that are set to be ignored in the existing process image are +also set to be ignored in the new process image. All other signals are +set to the default action in the new process image. For more +information about signals, see @ref{Signal Handling}. + +File descriptors open in the existing process image remain open in the +new process image, unless they have the @code{FD_CLOEXEC} +(close-on-exec) flag set. The files that remain open inherit all +attributes of the open file descriptors from the existing process image, +including file locks. File descriptors are discussed in @ref{Low-Level I/O}. + +Streams, by contrast, cannot survive through @code{exec} functions, +because they are located in the memory of the process itself. The new +process image has no streams except those it creates afresh. Each of +the streams in the pre-@code{exec} process image has a descriptor inside +it, and these descriptors do survive through @code{exec} (provided that +they do not have @code{FD_CLOEXEC} set). The new process image can +reconnect these to new streams using @code{fdopen} (@pxref{Descriptors +and Streams}). + +@node Process Completion +@section Process Completion +@cindex process completion +@cindex waiting for completion of child process +@cindex testing exit status of child process + +The functions described in this section are used to wait for a child +process to terminate or stop, and determine its status. These functions +are declared in the header file @file{sys/wait.h}. +@pindex sys/wait.h + +@comment sys/wait.h +@comment POSIX.1 +@deftypefun pid_t waitpid (pid_t @var{pid}, int *@var{status-ptr}, int @var{options}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{waitpid} function is used to request status information from a +child process whose process ID is @var{pid}. Normally, the calling +process is suspended until the child process makes status information +available by terminating. + +Other values for the @var{pid} argument have special interpretations. A +value of @code{-1} or @code{WAIT_ANY} requests status information for +any child process; a value of @code{0} or @code{WAIT_MYPGRP} requests +information for any child process in the same process group as the +calling process; and any other negative value @minus{} @var{pgid} +requests information for any child process whose process group ID is +@var{pgid}. + +If status information for a child process is available immediately, this +function returns immediately without waiting. If more than one eligible +child process has status information available, one of them is chosen +randomly, and its status is returned immediately. To get the status +from the other eligible child processes, you need to call @code{waitpid} +again. + +The @var{options} argument is a bit mask. Its value should be the +bitwise OR (that is, the @samp{|} operator) of zero or more of the +@code{WNOHANG} and @code{WUNTRACED} flags. You can use the +@code{WNOHANG} flag to indicate that the parent process shouldn't wait; +and the @code{WUNTRACED} flag to request status information from stopped +processes as well as processes that have terminated. + +The status information from the child process is stored in the object +that @var{status-ptr} points to, unless @var{status-ptr} is a null pointer. + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{waitpid} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this calls to @code{waitpid} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The return value is normally the process ID of the child process whose +status is reported. If there are child processes but none of them is +waiting to be noticed, @code{waitpid} will block until one is. However, +if the @code{WNOHANG} option was specified, @code{waitpid} will return +zero instead of blocking. + +If a specific PID to wait for was given to @code{waitpid}, it will +ignore all other children (if any). Therefore if there are children +waiting to be noticed but the child whose PID was specified is not one +of them, @code{waitpid} will block or return zero as described above. + +A value of @code{-1} is returned in case of error. The following +@code{errno} error conditions are defined for this function: + +@table @code +@item EINTR +The function was interrupted by delivery of a signal to the calling +process. @xref{Interrupted Primitives}. + +@item ECHILD +There are no child processes to wait for, or the specified @var{pid} +is not a child of the calling process. + +@item EINVAL +An invalid value was provided for the @var{options} argument. +@end table +@end deftypefun + +These symbolic constants are defined as values for the @var{pid} argument +to the @code{waitpid} function. + +@comment Extra blank lines make it look better. +@vtable @code +@item WAIT_ANY + +This constant macro (whose value is @code{-1}) specifies that +@code{waitpid} should return status information about any child process. + + +@item WAIT_MYPGRP +This constant (with value @code{0}) specifies that @code{waitpid} should +return status information about any child process in the same process +group as the calling process. +@end vtable + +These symbolic constants are defined as flags for the @var{options} +argument to the @code{waitpid} function. You can bitwise-OR the flags +together to obtain a value to use as the argument. + +@vtable @code +@item WNOHANG + +This flag specifies that @code{waitpid} should return immediately +instead of waiting, if there is no child process ready to be noticed. + +@item WUNTRACED + +This flag specifies that @code{waitpid} should report the status of any +child processes that have been stopped as well as those that have +terminated. +@end vtable + +@comment sys/wait.h +@comment POSIX.1 +@deftypefun pid_t wait (int *@var{status-ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is a simplified version of @code{waitpid}, and is used to wait +until any one child process terminates. The call: + +@smallexample +wait (&status) +@end smallexample + +@noindent +is exactly equivalent to: + +@smallexample +waitpid (-1, &status, 0) +@end smallexample + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{wait} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this calls to @code{wait} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop +@end deftypefun + +@comment sys/wait.h +@comment BSD +@deftypefun pid_t wait4 (pid_t @var{pid}, int *@var{status-ptr}, int @var{options}, struct rusage *@var{usage}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If @var{usage} is a null pointer, @code{wait4} is equivalent to +@code{waitpid (@var{pid}, @var{status-ptr}, @var{options})}. + +If @var{usage} is not null, @code{wait4} stores usage figures for the +child process in @code{*@var{rusage}} (but only if the child has +terminated, not if it has stopped). @xref{Resource Usage}. + +This function is a BSD extension. +@end deftypefun + +Here's an example of how to use @code{waitpid} to get the status from +all child processes that have terminated, without ever waiting. This +function is designed to be a handler for @code{SIGCHLD}, the signal that +indicates that at least one child process has terminated. + +@smallexample +@group +void +sigchld_handler (int signum) +@{ + int pid, status, serrno; + serrno = errno; + while (1) + @{ + pid = waitpid (WAIT_ANY, &status, WNOHANG); + if (pid < 0) + @{ + perror ("waitpid"); + break; + @} + if (pid == 0) + break; + notice_termination (pid, status); + @} + errno = serrno; +@} +@end group +@end smallexample + +@node Process Completion Status +@section Process Completion Status + +If the exit status value (@pxref{Program Termination}) of the child +process is zero, then the status value reported by @code{waitpid} or +@code{wait} is also zero. You can test for other kinds of information +encoded in the returned status value using the following macros. +These macros are defined in the header file @file{sys/wait.h}. +@pindex sys/wait.h + +@comment sys/wait.h +@comment POSIX.1 +@deftypefn Macro int WIFEXITED (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if the child process terminated +normally with @code{exit} or @code{_exit}. +@end deftypefn + +@comment sys/wait.h +@comment POSIX.1 +@deftypefn Macro int WEXITSTATUS (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If @code{WIFEXITED} is true of @var{status}, this macro returns the +low-order 8 bits of the exit status value from the child process. +@xref{Exit Status}. +@end deftypefn + +@comment sys/wait.h +@comment POSIX.1 +@deftypefn Macro int WIFSIGNALED (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if the child process terminated +because it received a signal that was not handled. +@xref{Signal Handling}. +@end deftypefn + +@comment sys/wait.h +@comment POSIX.1 +@deftypefn Macro int WTERMSIG (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If @code{WIFSIGNALED} is true of @var{status}, this macro returns the +signal number of the signal that terminated the child process. +@end deftypefn + +@comment sys/wait.h +@comment BSD +@deftypefn Macro int WCOREDUMP (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if the child process terminated +and produced a core dump. +@end deftypefn + +@comment sys/wait.h +@comment POSIX.1 +@deftypefn Macro int WIFSTOPPED (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro returns a nonzero value if the child process is stopped. +@end deftypefn + +@comment sys/wait.h +@comment POSIX.1 +@deftypefn Macro int WSTOPSIG (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If @code{WIFSTOPPED} is true of @var{status}, this macro returns the +signal number of the signal that caused the child process to stop. +@end deftypefn + + +@node BSD Wait Functions +@section BSD Process Wait Function + +@Theglibc{} also provides the @code{wait3} function for compatibility +with BSD. This function is declared in @file{sys/wait.h}. It is the +predecessor to @code{wait4}, which is more flexible. @code{wait3} is +now obsolete. +@pindex sys/wait.h + +@comment sys/wait.h +@comment BSD +@deftypefun pid_t wait3 (int *@var{status-ptr}, int @var{options}, struct rusage *@var{usage}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If @var{usage} is a null pointer, @code{wait3} is equivalent to +@code{waitpid (-1, @var{status-ptr}, @var{options})}. + +If @var{usage} is not null, @code{wait3} stores usage figures for the +child process in @code{*@var{rusage}} (but only if the child has +terminated, not if it has stopped). @xref{Resource Usage}. +@end deftypefun + +@node Process Creation Example +@section Process Creation Example + +Here is an example program showing how you might write a function +similar to the built-in @code{system}. It executes its @var{command} +argument using the equivalent of @samp{sh -c @var{command}}. + +@smallexample +#include <stddef.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> + +/* @r{Execute the command using this shell program.} */ +#define SHELL "/bin/sh" + +@group +int +my_system (const char *command) +@{ + int status; + pid_t pid; +@end group + + pid = fork (); + if (pid == 0) + @{ + /* @r{This is the child process. Execute the shell command.} */ + execl (SHELL, SHELL, "-c", command, NULL); + _exit (EXIT_FAILURE); + @} + else if (pid < 0) + /* @r{The fork failed. Report failure.} */ + status = -1; + else + /* @r{This is the parent process. Wait for the child to complete.} */ + if (waitpid (pid, &status, 0) != pid) + status = -1; + return status; +@} +@end smallexample + +@comment Yes, this example has been tested. + +There are a couple of things you should pay attention to in this +example. + +Remember that the first @code{argv} argument supplied to the program +represents the name of the program being executed. That is why, in the +call to @code{execl}, @code{SHELL} is supplied once to name the program +to execute and a second time to supply a value for @code{argv[0]}. + +The @code{execl} call in the child process doesn't return if it is +successful. If it fails, you must do something to make the child +process terminate. Just returning a bad status code with @code{return} +would leave two processes running the original program. Instead, the +right behavior is for the child process to report failure to its parent +process. + +Call @code{_exit} to accomplish this. The reason for using @code{_exit} +instead of @code{exit} is to avoid flushing fully buffered streams such +as @code{stdout}. The buffers of these streams probably contain data +that was copied from the parent process by the @code{fork}, data that +will be output eventually by the parent process. Calling @code{exit} in +the child would output the data twice. @xref{Termination Internals}. diff --git a/REORG.TODO/manual/resource.texi b/REORG.TODO/manual/resource.texi new file mode 100644 index 0000000000..40160384fc --- /dev/null +++ b/REORG.TODO/manual/resource.texi @@ -0,0 +1,1722 @@ +@node Resource Usage And Limitation, Non-Local Exits, Date and Time, Top +@c %MENU% Functions for examining resource usage and getting and setting limits +@chapter Resource Usage And Limitation +This chapter describes functions for examining how much of various kinds of +resources (CPU time, memory, etc.) a process has used and getting and setting +limits on future usage. + +@menu +* Resource Usage:: Measuring various resources used. +* Limits on Resources:: Specifying limits on resource usage. +* Priority:: Reading or setting process run priority. +* Memory Resources:: Querying memory available resources. +* Processor Resources:: Learn about the processors available. +@end menu + + +@node Resource Usage +@section Resource Usage + +@pindex sys/resource.h +The function @code{getrusage} and the data type @code{struct rusage} +are used to examine the resource usage of a process. They are declared +in @file{sys/resource.h}. + +@comment sys/resource.h +@comment BSD +@deftypefun int getrusage (int @var{processes}, struct rusage *@var{rusage}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On HURD, this calls task_info 3 times. On UNIX, it's a syscall. +This function reports resource usage totals for processes specified by +@var{processes}, storing the information in @code{*@var{rusage}}. + +In most systems, @var{processes} has only two valid values: + +@vtable @code +@comment sys/resource.h +@comment BSD +@item RUSAGE_SELF +Just the current process. + +@comment sys/resource.h +@comment BSD +@item RUSAGE_CHILDREN +All child processes (direct and indirect) that have already terminated. +@end vtable + +The return value of @code{getrusage} is zero for success, and @code{-1} +for failure. + +@table @code +@item EINVAL +The argument @var{processes} is not valid. +@end table +@end deftypefun + +One way of getting resource usage for a particular child process is with +the function @code{wait4}, which returns totals for a child when it +terminates. @xref{BSD Wait Functions}. + +@comment sys/resource.h +@comment BSD +@deftp {Data Type} {struct rusage} +This data type stores various resource usage statistics. It has the +following members, and possibly others: + +@table @code +@item struct timeval ru_utime +Time spent executing user instructions. + +@item struct timeval ru_stime +Time spent in operating system code on behalf of @var{processes}. + +@item long int ru_maxrss +The maximum resident set size used, in kilobytes. That is, the maximum +number of kilobytes of physical memory that @var{processes} used +simultaneously. + +@item long int ru_ixrss +An integral value expressed in kilobytes times ticks of execution, which +indicates the amount of memory used by text that was shared with other +processes. + +@item long int ru_idrss +An integral value expressed the same way, which is the amount of +unshared memory used for data. + +@item long int ru_isrss +An integral value expressed the same way, which is the amount of +unshared memory used for stack space. + +@item long int ru_minflt +The number of page faults which were serviced without requiring any I/O. + +@item long int ru_majflt +The number of page faults which were serviced by doing I/O. + +@item long int ru_nswap +The number of times @var{processes} was swapped entirely out of main memory. + +@item long int ru_inblock +The number of times the file system had to read from the disk on behalf +of @var{processes}. + +@item long int ru_oublock +The number of times the file system had to write to the disk on behalf +of @var{processes}. + +@item long int ru_msgsnd +Number of IPC messages sent. + +@item long int ru_msgrcv +Number of IPC messages received. + +@item long int ru_nsignals +Number of signals received. + +@item long int ru_nvcsw +The number of times @var{processes} voluntarily invoked a context switch +(usually to wait for some service). + +@item long int ru_nivcsw +The number of times an involuntary context switch took place (because +a time slice expired, or another process of higher priority was +scheduled). +@end table +@end deftp + +@code{vtimes} is a historical function that does some of what +@code{getrusage} does. @code{getrusage} is a better choice. + +@code{vtimes} and its @code{vtimes} data structure are declared in +@file{sys/vtimes.h}. +@pindex sys/vtimes.h + +@comment sys/vtimes.h +@deftypefun int vtimes (struct vtimes *@var{current}, struct vtimes *@var{child}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Calls getrusage twice. + +@code{vtimes} reports resource usage totals for a process. + +If @var{current} is non-null, @code{vtimes} stores resource usage totals for +the invoking process alone in the structure to which it points. If +@var{child} is non-null, @code{vtimes} stores resource usage totals for all +past children (which have terminated) of the invoking process in the structure +to which it points. + +@deftp {Data Type} {struct vtimes} +This data type contains information about the resource usage of a process. +Each member corresponds to a member of the @code{struct rusage} data type +described above. + +@table @code +@item vm_utime +User CPU time. Analogous to @code{ru_utime} in @code{struct rusage} +@item vm_stime +System CPU time. Analogous to @code{ru_stime} in @code{struct rusage} +@item vm_idsrss +Data and stack memory. The sum of the values that would be reported as +@code{ru_idrss} and @code{ru_isrss} in @code{struct rusage} +@item vm_ixrss +Shared memory. Analogous to @code{ru_ixrss} in @code{struct rusage} +@item vm_maxrss +Maximent resident set size. Analogous to @code{ru_maxrss} in +@code{struct rusage} +@item vm_majflt +Major page faults. Analogous to @code{ru_majflt} in @code{struct rusage} +@item vm_minflt +Minor page faults. Analogous to @code{ru_minflt} in @code{struct rusage} +@item vm_nswap +Swap count. Analogous to @code{ru_nswap} in @code{struct rusage} +@item vm_inblk +Disk reads. Analogous to @code{ru_inblk} in @code{struct rusage} +@item vm_oublk +Disk writes. Analogous to @code{ru_oublk} in @code{struct rusage} +@end table +@end deftp + + +The return value is zero if the function succeeds; @code{-1} otherwise. + + + +@end deftypefun +An additional historical function for examining resource usage, +@code{vtimes}, is supported but not documented here. It is declared in +@file{sys/vtimes.h}. + +@node Limits on Resources +@section Limiting Resource Usage +@cindex resource limits +@cindex limits on resource usage +@cindex usage limits + +You can specify limits for the resource usage of a process. When the +process tries to exceed a limit, it may get a signal, or the system call +by which it tried to do so may fail, depending on the resource. Each +process initially inherits its limit values from its parent, but it can +subsequently change them. + +There are two per-process limits associated with a resource: +@cindex limit + +@table @dfn +@item current limit +The current limit is the value the system will not allow usage to +exceed. It is also called the ``soft limit'' because the process being +limited can generally raise the current limit at will. +@cindex current limit +@cindex soft limit + +@item maximum limit +The maximum limit is the maximum value to which a process is allowed to +set its current limit. It is also called the ``hard limit'' because +there is no way for a process to get around it. A process may lower +its own maximum limit, but only the superuser may increase a maximum +limit. +@cindex maximum limit +@cindex hard limit +@end table + +@pindex sys/resource.h +The symbols for use with @code{getrlimit}, @code{setrlimit}, +@code{getrlimit64}, and @code{setrlimit64} are defined in +@file{sys/resource.h}. + +@comment sys/resource.h +@comment BSD +@deftypefun int getrlimit (int @var{resource}, struct rlimit *@var{rlp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on most systems. +Read the current and maximum limits for the resource @var{resource} +and store them in @code{*@var{rlp}}. + +The return value is @code{0} on success and @code{-1} on failure. The +only possible @code{errno} error condition is @code{EFAULT}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit system this function is in fact @code{getrlimit64}. Thus, the +LFS interface transparently replaces the old interface. +@end deftypefun + +@comment sys/resource.h +@comment Unix98 +@deftypefun int getrlimit64 (int @var{resource}, struct rlimit64 *@var{rlp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on most systems, wrapper to getrlimit otherwise. +This function is similar to @code{getrlimit} but its second parameter is +a pointer to a variable of type @code{struct rlimit64}, which allows it +to read values which wouldn't fit in the member of a @code{struct +rlimit}. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit machine, this function is available under the name +@code{getrlimit} and so transparently replaces the old interface. +@end deftypefun + +@comment sys/resource.h +@comment BSD +@deftypefun int setrlimit (int @var{resource}, const struct rlimit *@var{rlp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on most systems; lock-taking critical section on HURD. +Store the current and maximum limits for the resource @var{resource} +in @code{*@var{rlp}}. + +The return value is @code{0} on success and @code{-1} on failure. The +following @code{errno} error condition is possible: + +@table @code +@item EPERM +@itemize @bullet +@item +The process tried to raise a current limit beyond the maximum limit. + +@item +The process tried to raise a maximum limit, but is not superuser. +@end itemize +@end table + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit system this function is in fact @code{setrlimit64}. Thus, the +LFS interface transparently replaces the old interface. +@end deftypefun + +@comment sys/resource.h +@comment Unix98 +@deftypefun int setrlimit64 (int @var{resource}, const struct rlimit64 *@var{rlp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Wrapper for setrlimit or direct syscall. +This function is similar to @code{setrlimit} but its second parameter is +a pointer to a variable of type @code{struct rlimit64} which allows it +to set values which wouldn't fit in the member of a @code{struct +rlimit}. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32-bit machine this function is available under the name +@code{setrlimit} and so transparently replaces the old interface. +@end deftypefun + +@comment sys/resource.h +@comment BSD +@deftp {Data Type} {struct rlimit} +This structure is used with @code{getrlimit} to receive limit values, +and with @code{setrlimit} to specify limit values for a particular process +and resource. It has two fields: + +@table @code +@item rlim_t rlim_cur +The current limit + +@item rlim_t rlim_max +The maximum limit. +@end table + +For @code{getrlimit}, the structure is an output; it receives the current +values. For @code{setrlimit}, it specifies the new values. +@end deftp + +For the LFS functions a similar type is defined in @file{sys/resource.h}. + +@comment sys/resource.h +@comment Unix98 +@deftp {Data Type} {struct rlimit64} +This structure is analogous to the @code{rlimit} structure above, but +its components have wider ranges. It has two fields: + +@table @code +@item rlim64_t rlim_cur +This is analogous to @code{rlimit.rlim_cur}, but with a different type. + +@item rlim64_t rlim_max +This is analogous to @code{rlimit.rlim_max}, but with a different type. +@end table + +@end deftp + +Here is a list of resources for which you can specify a limit. Memory +and file sizes are measured in bytes. + +@vtable @code +@comment sys/resource.h +@comment BSD +@item RLIMIT_CPU +The maximum amount of CPU time the process can use. If it runs for +longer than this, it gets a signal: @code{SIGXCPU}. The value is +measured in seconds. @xref{Operation Error Signals}. + +@comment sys/resource.h +@comment BSD +@item RLIMIT_FSIZE +The maximum size of file the process can create. Trying to write a +larger file causes a signal: @code{SIGXFSZ}. @xref{Operation Error +Signals}. + +@comment sys/resource.h +@comment BSD +@item RLIMIT_DATA +The maximum size of data memory for the process. If the process tries +to allocate data memory beyond this amount, the allocation function +fails. + +@comment sys/resource.h +@comment BSD +@item RLIMIT_STACK +The maximum stack size for the process. If the process tries to extend +its stack past this size, it gets a @code{SIGSEGV} signal. +@xref{Program Error Signals}. + +@comment sys/resource.h +@comment BSD +@item RLIMIT_CORE +The maximum size core file that this process can create. If the process +terminates and would dump a core file larger than this, then no core +file is created. So setting this limit to zero prevents core files from +ever being created. + +@comment sys/resource.h +@comment BSD +@item RLIMIT_RSS +The maximum amount of physical memory that this process should get. +This parameter is a guide for the system's scheduler and memory +allocator; the system may give the process more memory when there is a +surplus. + +@comment sys/resource.h +@comment BSD +@item RLIMIT_MEMLOCK +The maximum amount of memory that can be locked into physical memory (so +it will never be paged out). + +@comment sys/resource.h +@comment BSD +@item RLIMIT_NPROC +The maximum number of processes that can be created with the same user ID. +If you have reached the limit for your user ID, @code{fork} will fail +with @code{EAGAIN}. @xref{Creating a Process}. + +@comment sys/resource.h +@comment BSD +@item RLIMIT_NOFILE +@itemx RLIMIT_OFILE +The maximum number of files that the process can open. If it tries to +open more files than this, its open attempt fails with @code{errno} +@code{EMFILE}. @xref{Error Codes}. Not all systems support this limit; +GNU does, and 4.4 BSD does. + +@comment sys/resource.h +@comment Unix98 +@item RLIMIT_AS +The maximum size of total memory that this process should get. If the +process tries to allocate more memory beyond this amount with, for +example, @code{brk}, @code{malloc}, @code{mmap} or @code{sbrk}, the +allocation function fails. + +@comment sys/resource.h +@comment BSD +@item RLIM_NLIMITS +The number of different resource limits. Any valid @var{resource} +operand must be less than @code{RLIM_NLIMITS}. +@end vtable + +@comment sys/resource.h +@comment BSD +@deftypevr Constant rlim_t RLIM_INFINITY +This constant stands for a value of ``infinity'' when supplied as +the limit value in @code{setrlimit}. +@end deftypevr + + +The following are historical functions to do some of what the functions +above do. The functions above are better choices. + +@code{ulimit} and the command symbols are declared in @file{ulimit.h}. +@pindex ulimit.h + +@comment ulimit.h +@comment BSD +@deftypefun {long int} ulimit (int @var{cmd}, @dots{}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Wrapper for getrlimit, setrlimit or +@c sysconf(_SC_OPEN_MAX)->getdtablesize->getrlimit. + +@code{ulimit} gets the current limit or sets the current and maximum +limit for a particular resource for the calling process according to the +command @var{cmd}. + +If you are getting a limit, the command argument is the only argument. +If you are setting a limit, there is a second argument: +@code{long int} @var{limit} which is the value to which you are setting +the limit. + +The @var{cmd} values and the operations they specify are: +@vtable @code + +@item GETFSIZE +Get the current limit on the size of a file, in units of 512 bytes. + +@item SETFSIZE +Set the current and maximum limit on the size of a file to @var{limit} * +512 bytes. + +@end vtable + +There are also some other @var{cmd} values that may do things on some +systems, but they are not supported. + +Only the superuser may increase a maximum limit. + +When you successfully get a limit, the return value of @code{ulimit} is +that limit, which is never negative. When you successfully set a limit, +the return value is zero. When the function fails, the return value is +@code{-1} and @code{errno} is set according to the reason: + +@table @code +@item EPERM +A process tried to increase a maximum limit, but is not superuser. +@end table + + +@end deftypefun + +@code{vlimit} and its resource symbols are declared in @file{sys/vlimit.h}. +@pindex sys/vlimit.h + +@comment sys/vlimit.h +@comment BSD +@deftypefun int vlimit (int @var{resource}, int @var{limit}) +@safety{@prelim{}@mtunsafe{@mtasurace{:setrlimit}}@asunsafe{}@acsafe{}} +@c It calls getrlimit and modifies the rlim_cur field before calling +@c setrlimit. There's a window for a concurrent call to setrlimit that +@c modifies e.g. rlim_max, which will be lost if running as super-user. + +@code{vlimit} sets the current limit for a resource for a process. + +@var{resource} identifies the resource: + +@vtable @code +@item LIM_CPU +Maximum CPU time. Same as @code{RLIMIT_CPU} for @code{setrlimit}. +@item LIM_FSIZE +Maximum file size. Same as @code{RLIMIT_FSIZE} for @code{setrlimit}. +@item LIM_DATA +Maximum data memory. Same as @code{RLIMIT_DATA} for @code{setrlimit}. +@item LIM_STACK +Maximum stack size. Same as @code{RLIMIT_STACK} for @code{setrlimit}. +@item LIM_CORE +Maximum core file size. Same as @code{RLIMIT_COR} for @code{setrlimit}. +@item LIM_MAXRSS +Maximum physical memory. Same as @code{RLIMIT_RSS} for @code{setrlimit}. +@end vtable + +The return value is zero for success, and @code{-1} with @code{errno} set +accordingly for failure: + +@table @code +@item EPERM +The process tried to set its current limit beyond its maximum limit. +@end table + +@end deftypefun + +@node Priority +@section Process CPU Priority And Scheduling +@cindex process priority +@cindex cpu priority +@cindex priority of a process + +When multiple processes simultaneously require CPU time, the system's +scheduling policy and process CPU priorities determine which processes +get it. This section describes how that determination is made and +@glibcadj{} functions to control it. + +It is common to refer to CPU scheduling simply as scheduling and a +process' CPU priority simply as the process' priority, with the CPU +resource being implied. Bear in mind, though, that CPU time is not the +only resource a process uses or that processes contend for. In some +cases, it is not even particularly important. Giving a process a high +``priority'' may have very little effect on how fast a process runs with +respect to other processes. The priorities discussed in this section +apply only to CPU time. + +CPU scheduling is a complex issue and different systems do it in wildly +different ways. New ideas continually develop and find their way into +the intricacies of the various systems' scheduling algorithms. This +section discusses the general concepts, some specifics of systems +that commonly use @theglibc{}, and some standards. + +For simplicity, we talk about CPU contention as if there is only one CPU +in the system. But all the same principles apply when a processor has +multiple CPUs, and knowing that the number of processes that can run at +any one time is equal to the number of CPUs, you can easily extrapolate +the information. + +The functions described in this section are all defined by the POSIX.1 +and POSIX.1b standards (the @code{sched@dots{}} functions are POSIX.1b). +However, POSIX does not define any semantics for the values that these +functions get and set. In this chapter, the semantics are based on the +Linux kernel's implementation of the POSIX standard. As you will see, +the Linux implementation is quite the inverse of what the authors of the +POSIX syntax had in mind. + +@menu +* Absolute Priority:: The first tier of priority. Posix +* Realtime Scheduling:: Scheduling among the process nobility +* Basic Scheduling Functions:: Get/set scheduling policy, priority +* Traditional Scheduling:: Scheduling among the vulgar masses +* CPU Affinity:: Limiting execution to certain CPUs +@end menu + + + +@node Absolute Priority +@subsection Absolute Priority +@cindex absolute priority +@cindex priority, absolute + +Every process has an absolute priority, and it is represented by a number. +The higher the number, the higher the absolute priority. + +@cindex realtime CPU scheduling +On systems of the past, and most systems today, all processes have +absolute priority 0 and this section is irrelevant. In that case, +@xref{Traditional Scheduling}. Absolute priorities were invented to +accommodate realtime systems, in which it is vital that certain processes +be able to respond to external events happening in real time, which +means they cannot wait around while some other process that @emph{wants +to}, but doesn't @emph{need to} run occupies the CPU. + +@cindex ready to run +@cindex preemptive scheduling +When two processes are in contention to use the CPU at any instant, the +one with the higher absolute priority always gets it. This is true even if the +process with the lower priority is already using the CPU (i.e., the +scheduling is preemptive). Of course, we're only talking about +processes that are running or ``ready to run,'' which means they are +ready to execute instructions right now. When a process blocks to wait +for something like I/O, its absolute priority is irrelevant. + +@cindex runnable process +@strong{NB:} The term ``runnable'' is a synonym for ``ready to run.'' + +When two processes are running or ready to run and both have the same +absolute priority, it's more interesting. In that case, who gets the +CPU is determined by the scheduling policy. If the processes have +absolute priority 0, the traditional scheduling policy described in +@ref{Traditional Scheduling} applies. Otherwise, the policies described +in @ref{Realtime Scheduling} apply. + +You normally give an absolute priority above 0 only to a process that +can be trusted not to hog the CPU. Such processes are designed to block +(or terminate) after relatively short CPU runs. + +A process begins life with the same absolute priority as its parent +process. Functions described in @ref{Basic Scheduling Functions} can +change it. + +Only a privileged process can change a process' absolute priority to +something other than @code{0}. Only a privileged process or the +target process' owner can change its absolute priority at all. + +POSIX requires absolute priority values used with the realtime +scheduling policies to be consecutive with a range of at least 32. On +Linux, they are 1 through 99. The functions +@code{sched_get_priority_max} and @code{sched_set_priority_min} portably +tell you what the range is on a particular system. + + +@subsubsection Using Absolute Priority + +One thing you must keep in mind when designing real time applications is +that having higher absolute priority than any other process doesn't +guarantee the process can run continuously. Two things that can wreck a +good CPU run are interrupts and page faults. + +Interrupt handlers live in that limbo between processes. The CPU is +executing instructions, but they aren't part of any process. An +interrupt will stop even the highest priority process. So you must +allow for slight delays and make sure that no device in the system has +an interrupt handler that could cause too long a delay between +instructions for your process. + +Similarly, a page fault causes what looks like a straightforward +sequence of instructions to take a long time. The fact that other +processes get to run while the page faults in is of no consequence, +because as soon as the I/O is complete, the higher priority process will +kick them out and run again, but the wait for the I/O itself could be a +problem. To neutralize this threat, use @code{mlock} or +@code{mlockall}. + +There are a few ramifications of the absoluteness of this priority on a +single-CPU system that you need to keep in mind when you choose to set a +priority and also when you're working on a program that runs with high +absolute priority. Consider a process that has higher absolute priority +than any other process in the system and due to a bug in its program, it +gets into an infinite loop. It will never cede the CPU. You can't run +a command to kill it because your command would need to get the CPU in +order to run. The errant program is in complete control. It controls +the vertical, it controls the horizontal. + +There are two ways to avoid this: 1) keep a shell running somewhere with +a higher absolute priority or 2) keep a controlling terminal attached to +the high priority process group. All the priority in the world won't +stop an interrupt handler from running and delivering a signal to the +process if you hit Control-C. + +Some systems use absolute priority as a means of allocating a fixed +percentage of CPU time to a process. To do this, a super high priority +privileged process constantly monitors the process' CPU usage and raises +its absolute priority when the process isn't getting its entitled share +and lowers it when the process is exceeding it. + +@strong{NB:} The absolute priority is sometimes called the ``static +priority.'' We don't use that term in this manual because it misses the +most important feature of the absolute priority: its absoluteness. + + +@node Realtime Scheduling +@subsection Realtime Scheduling +@cindex realtime scheduling + +Whenever two processes with the same absolute priority are ready to run, +the kernel has a decision to make, because only one can run at a time. +If the processes have absolute priority 0, the kernel makes this decision +as described in @ref{Traditional Scheduling}. Otherwise, the decision +is as described in this section. + +If two processes are ready to run but have different absolute priorities, +the decision is much simpler, and is described in @ref{Absolute +Priority}. + +Each process has a scheduling policy. For processes with absolute +priority other than zero, there are two available: + +@enumerate +@item +First Come First Served +@item +Round Robin +@end enumerate + +The most sensible case is where all the processes with a certain +absolute priority have the same scheduling policy. We'll discuss that +first. + +In Round Robin, processes share the CPU, each one running for a small +quantum of time (``time slice'') and then yielding to another in a +circular fashion. Of course, only processes that are ready to run and +have the same absolute priority are in this circle. + +In First Come First Served, the process that has been waiting the +longest to run gets the CPU, and it keeps it until it voluntarily +relinquishes the CPU, runs out of things to do (blocks), or gets +preempted by a higher priority process. + +First Come First Served, along with maximal absolute priority and +careful control of interrupts and page faults, is the one to use when a +process absolutely, positively has to run at full CPU speed or not at +all. + +Judicious use of @code{sched_yield} function invocations by processes +with First Come First Served scheduling policy forms a good compromise +between Round Robin and First Come First Served. + +To understand how scheduling works when processes of different scheduling +policies occupy the same absolute priority, you have to know the nitty +gritty details of how processes enter and exit the ready to run list. + +In both cases, the ready to run list is organized as a true queue, where +a process gets pushed onto the tail when it becomes ready to run and is +popped off the head when the scheduler decides to run it. Note that +ready to run and running are two mutually exclusive states. When the +scheduler runs a process, that process is no longer ready to run and no +longer in the ready to run list. When the process stops running, it +may go back to being ready to run again. + +The only difference between a process that is assigned the Round Robin +scheduling policy and a process that is assigned First Come First Serve +is that in the former case, the process is automatically booted off the +CPU after a certain amount of time. When that happens, the process goes +back to being ready to run, which means it enters the queue at the tail. +The time quantum we're talking about is small. Really small. This is +not your father's timesharing. For example, with the Linux kernel, the +round robin time slice is a thousand times shorter than its typical +time slice for traditional scheduling. + +A process begins life with the same scheduling policy as its parent process. +Functions described in @ref{Basic Scheduling Functions} can change it. + +Only a privileged process can set the scheduling policy of a process +that has absolute priority higher than 0. + +@node Basic Scheduling Functions +@subsection Basic Scheduling Functions + +This section describes functions in @theglibc{} for setting the +absolute priority and scheduling policy of a process. + +@strong{Portability Note:} On systems that have the functions in this +section, the macro _POSIX_PRIORITY_SCHEDULING is defined in +@file{<unistd.h>}. + +For the case that the scheduling policy is traditional scheduling, more +functions to fine tune the scheduling are in @ref{Traditional Scheduling}. + +Don't try to make too much out of the naming and structure of these +functions. They don't match the concepts described in this manual +because the functions are as defined by POSIX.1b, but the implementation +on systems that use @theglibc{} is the inverse of what the POSIX +structure contemplates. The POSIX scheme assumes that the primary +scheduling parameter is the scheduling policy and that the priority +value, if any, is a parameter of the scheduling policy. In the +implementation, though, the priority value is king and the scheduling +policy, if anything, only fine tunes the effect of that priority. + +The symbols in this section are declared by including file @file{sched.h}. + +@comment sched.h +@comment POSIX +@deftp {Data Type} {struct sched_param} +This structure describes an absolute priority. +@table @code +@item int sched_priority +absolute priority value +@end table +@end deftp + +@comment sched.h +@comment POSIX +@deftypefun int sched_setscheduler (pid_t @var{pid}, int @var{policy}, const struct sched_param *@var{param}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, Linux only. + +This function sets both the absolute priority and the scheduling policy +for a process. + +It assigns the absolute priority value given by @var{param} and the +scheduling policy @var{policy} to the process with Process ID @var{pid}, +or the calling process if @var{pid} is zero. If @var{policy} is +negative, @code{sched_setscheduler} keeps the existing scheduling policy. + +The following macros represent the valid values for @var{policy}: + +@vtable @code +@item SCHED_OTHER +Traditional Scheduling +@item SCHED_FIFO +First In First Out +@item SCHED_RR +Round Robin +@end vtable + +@c The Linux kernel code (in sched.c) actually reschedules the process, +@c but it puts it at the head of the run queue, so I'm not sure just what +@c the effect is, but it must be subtle. + +On success, the return value is @code{0}. Otherwise, it is @code{-1} +and @code{ERRNO} is set accordingly. The @code{errno} values specific +to this function are: + +@table @code +@item EPERM +@itemize @bullet +@item +The calling process does not have @code{CAP_SYS_NICE} permission and +@var{policy} is not @code{SCHED_OTHER} (or it's negative and the +existing policy is not @code{SCHED_OTHER}. + +@item +The calling process does not have @code{CAP_SYS_NICE} permission and its +owner is not the target process' owner. I.e., the effective uid of the +calling process is neither the effective nor the real uid of process +@var{pid}. +@c We need a cross reference to the capabilities section, when written. +@end itemize + +@item ESRCH +There is no process with pid @var{pid} and @var{pid} is not zero. + +@item EINVAL +@itemize @bullet +@item +@var{policy} does not identify an existing scheduling policy. + +@item +The absolute priority value identified by *@var{param} is outside the +valid range for the scheduling policy @var{policy} (or the existing +scheduling policy if @var{policy} is negative) or @var{param} is +null. @code{sched_get_priority_max} and @code{sched_get_priority_min} +tell you what the valid range is. + +@item +@var{pid} is negative. +@end itemize +@end table + +@end deftypefun + + +@comment sched.h +@comment POSIX +@deftypefun int sched_getscheduler (pid_t @var{pid}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, Linux only. + +This function returns the scheduling policy assigned to the process with +Process ID (pid) @var{pid}, or the calling process if @var{pid} is zero. + +The return value is the scheduling policy. See +@code{sched_setscheduler} for the possible values. + +If the function fails, the return value is instead @code{-1} and +@code{errno} is set accordingly. + +The @code{errno} values specific to this function are: + +@table @code + +@item ESRCH +There is no process with pid @var{pid} and it is not zero. + +@item EINVAL +@var{pid} is negative. + +@end table + +Note that this function is not an exact mate to @code{sched_setscheduler} +because while that function sets the scheduling policy and the absolute +priority, this function gets only the scheduling policy. To get the +absolute priority, use @code{sched_getparam}. + +@end deftypefun + + +@comment sched.h +@comment POSIX +@deftypefun int sched_setparam (pid_t @var{pid}, const struct sched_param *@var{param}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, Linux only. + +This function sets a process' absolute priority. + +It is functionally identical to @code{sched_setscheduler} with +@var{policy} = @code{-1}. + +@c in fact, that's how it's implemented in Linux. + +@end deftypefun + +@comment sched.h +@comment POSIX +@deftypefun int sched_getparam (pid_t @var{pid}, struct sched_param *@var{param}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, Linux only. + +This function returns a process' absolute priority. + +@var{pid} is the Process ID (pid) of the process whose absolute priority +you want to know. + +@var{param} is a pointer to a structure in which the function stores the +absolute priority of the process. + +On success, the return value is @code{0}. Otherwise, it is @code{-1} +and @code{errno} is set accordingly. The @code{errno} values specific +to this function are: + +@table @code + +@item ESRCH +There is no process with pid @var{pid} and it is not zero. + +@item EINVAL +@var{pid} is negative. + +@end table + +@end deftypefun + + +@comment sched.h +@comment POSIX +@deftypefun int sched_get_priority_min (int @var{policy}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, Linux only. + +This function returns the lowest absolute priority value that is +allowable for a process with scheduling policy @var{policy}. + +On Linux, it is 0 for SCHED_OTHER and 1 for everything else. + +On success, the return value is @code{0}. Otherwise, it is @code{-1} +and @code{ERRNO} is set accordingly. The @code{errno} values specific +to this function are: + +@table @code +@item EINVAL +@var{policy} does not identify an existing scheduling policy. +@end table + +@end deftypefun + +@comment sched.h +@comment POSIX +@deftypefun int sched_get_priority_max (int @var{policy}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, Linux only. + +This function returns the highest absolute priority value that is +allowable for a process that with scheduling policy @var{policy}. + +On Linux, it is 0 for SCHED_OTHER and 99 for everything else. + +On success, the return value is @code{0}. Otherwise, it is @code{-1} +and @code{ERRNO} is set accordingly. The @code{errno} values specific +to this function are: + +@table @code +@item EINVAL +@var{policy} does not identify an existing scheduling policy. +@end table + +@end deftypefun + +@comment sched.h +@comment POSIX +@deftypefun int sched_rr_get_interval (pid_t @var{pid}, struct timespec *@var{interval}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, Linux only. + +This function returns the length of the quantum (time slice) used with +the Round Robin scheduling policy, if it is used, for the process with +Process ID @var{pid}. + +It returns the length of time as @var{interval}. +@c We need a cross-reference to where timespec is explained. But that +@c section doesn't exist yet, and the time chapter needs to be slightly +@c reorganized so there is a place to put it (which will be right next +@c to timeval, which is presently misplaced). 2000.05.07. + +With a Linux kernel, the round robin time slice is always 150 +microseconds, and @var{pid} need not even be a real pid. + +The return value is @code{0} on success and in the pathological case +that it fails, the return value is @code{-1} and @code{errno} is set +accordingly. There is nothing specific that can go wrong with this +function, so there are no specific @code{errno} values. + +@end deftypefun + +@comment sched.h +@comment POSIX +@deftypefun int sched_yield (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on Linux; alias to swtch on HURD. + +This function voluntarily gives up the process' claim on the CPU. + +Technically, @code{sched_yield} causes the calling process to be made +immediately ready to run (as opposed to running, which is what it was +before). This means that if it has absolute priority higher than 0, it +gets pushed onto the tail of the queue of processes that share its +absolute priority and are ready to run, and it will run again when its +turn next arrives. If its absolute priority is 0, it is more +complicated, but still has the effect of yielding the CPU to other +processes. + +If there are no other processes that share the calling process' absolute +priority, this function doesn't have any effect. + +To the extent that the containing program is oblivious to what other +processes in the system are doing and how fast it executes, this +function appears as a no-op. + +The return value is @code{0} on success and in the pathological case +that it fails, the return value is @code{-1} and @code{errno} is set +accordingly. There is nothing specific that can go wrong with this +function, so there are no specific @code{errno} values. + +@end deftypefun + +@node Traditional Scheduling +@subsection Traditional Scheduling +@cindex scheduling, traditional + +This section is about the scheduling among processes whose absolute +priority is 0. When the system hands out the scraps of CPU time that +are left over after the processes with higher absolute priority have +taken all they want, the scheduling described herein determines who +among the great unwashed processes gets them. + +@menu +* Traditional Scheduling Intro:: +* Traditional Scheduling Functions:: +@end menu + +@node Traditional Scheduling Intro +@subsubsection Introduction To Traditional Scheduling + +Long before there was absolute priority (See @ref{Absolute Priority}), +Unix systems were scheduling the CPU using this system. When POSIX came +in like the Romans and imposed absolute priorities to accommodate the +needs of realtime processing, it left the indigenous Absolute Priority +Zero processes to govern themselves by their own familiar scheduling +policy. + +Indeed, absolute priorities higher than zero are not available on many +systems today and are not typically used when they are, being intended +mainly for computers that do realtime processing. So this section +describes the only scheduling many programmers need to be concerned +about. + +But just to be clear about the scope of this scheduling: Any time a +process with an absolute priority of 0 and a process with an absolute +priority higher than 0 are ready to run at the same time, the one with +absolute priority 0 does not run. If it's already running when the +higher priority ready-to-run process comes into existence, it stops +immediately. + +In addition to its absolute priority of zero, every process has another +priority, which we will refer to as "dynamic priority" because it changes +over time. The dynamic priority is meaningless for processes with +an absolute priority higher than zero. + +The dynamic priority sometimes determines who gets the next turn on the +CPU. Sometimes it determines how long turns last. Sometimes it +determines whether a process can kick another off the CPU. + +In Linux, the value is a combination of these things, but mostly it +just determines the length of the time slice. The higher a process' +dynamic priority, the longer a shot it gets on the CPU when it gets one. +If it doesn't use up its time slice before giving up the CPU to do +something like wait for I/O, it is favored for getting the CPU back when +it's ready for it, to finish out its time slice. Other than that, +selection of processes for new time slices is basically round robin. +But the scheduler does throw a bone to the low priority processes: A +process' dynamic priority rises every time it is snubbed in the +scheduling process. In Linux, even the fat kid gets to play. + +The fluctuation of a process' dynamic priority is regulated by another +value: The ``nice'' value. The nice value is an integer, usually in the +range -20 to 20, and represents an upper limit on a process' dynamic +priority. The higher the nice number, the lower that limit. + +On a typical Linux system, for example, a process with a nice value of +20 can get only 10 milliseconds on the CPU at a time, whereas a process +with a nice value of -20 can achieve a high enough priority to get 400 +milliseconds. + +The idea of the nice value is deferential courtesy. In the beginning, +in the Unix garden of Eden, all processes shared equally in the bounty +of the computer system. But not all processes really need the same +share of CPU time, so the nice value gave a courteous process the +ability to refuse its equal share of CPU time that others might prosper. +Hence, the higher a process' nice value, the nicer the process is. +(Then a snake came along and offered some process a negative nice value +and the system became the crass resource allocation system we know +today.) + +Dynamic priorities tend upward and downward with an objective of +smoothing out allocation of CPU time and giving quick response time to +infrequent requests. But they never exceed their nice limits, so on a +heavily loaded CPU, the nice value effectively determines how fast a +process runs. + +In keeping with the socialistic heritage of Unix process priority, a +process begins life with the same nice value as its parent process and +can raise it at will. A process can also raise the nice value of any +other process owned by the same user (or effective user). But only a +privileged process can lower its nice value. A privileged process can +also raise or lower another process' nice value. + +@glibcadj{} functions for getting and setting nice values are described in +@xref{Traditional Scheduling Functions}. + +@node Traditional Scheduling Functions +@subsubsection Functions For Traditional Scheduling + +@pindex sys/resource.h +This section describes how you can read and set the nice value of a +process. All these symbols are declared in @file{sys/resource.h}. + +The function and macro names are defined by POSIX, and refer to +"priority," but the functions actually have to do with nice values, as +the terms are used both in the manual and POSIX. + +The range of valid nice values depends on the kernel, but typically it +runs from @code{-20} to @code{20}. A lower nice value corresponds to +higher priority for the process. These constants describe the range of +priority values: + +@vtable @code +@comment sys/resource.h +@comment BSD +@item PRIO_MIN +The lowest valid nice value. + +@comment sys/resource.h +@comment BSD +@item PRIO_MAX +The highest valid nice value. +@end vtable + +@comment sys/resource.h +@comment BSD, POSIX +@deftypefun int getpriority (int @var{class}, int @var{id}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on UNIX. On HURD, calls _hurd_priority_which_map. +Return the nice value of a set of processes; @var{class} and @var{id} +specify which ones (see below). If the processes specified do not all +have the same nice value, this returns the lowest value that any of them +has. + +On success, the return value is @code{0}. Otherwise, it is @code{-1} +and @code{errno} is set accordingly. The @code{errno} values specific +to this function are: + +@table @code +@item ESRCH +The combination of @var{class} and @var{id} does not match any existing +process. + +@item EINVAL +The value of @var{class} is not valid. +@end table + +If the return value is @code{-1}, it could indicate failure, or it could +be the nice value. The only way to make certain is to set @code{errno = +0} before calling @code{getpriority}, then use @code{errno != 0} +afterward as the criterion for failure. +@end deftypefun + +@comment sys/resource.h +@comment BSD, POSIX +@deftypefun int setpriority (int @var{class}, int @var{id}, int @var{niceval}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on UNIX. On HURD, calls _hurd_priority_which_map. +Set the nice value of a set of processes to @var{niceval}; @var{class} +and @var{id} specify which ones (see below). + +The return value is @code{0} on success, and @code{-1} on +failure. The following @code{errno} error condition are possible for +this function: + +@table @code +@item ESRCH +The combination of @var{class} and @var{id} does not match any existing +process. + +@item EINVAL +The value of @var{class} is not valid. + +@item EPERM +The call would set the nice value of a process which is owned by a different +user than the calling process (i.e., the target process' real or effective +uid does not match the calling process' effective uid) and the calling +process does not have @code{CAP_SYS_NICE} permission. + +@item EACCES +The call would lower the process' nice value and the process does not have +@code{CAP_SYS_NICE} permission. +@end table + +@end deftypefun + +The arguments @var{class} and @var{id} together specify a set of +processes in which you are interested. These are the possible values of +@var{class}: + +@vtable @code +@comment sys/resource.h +@comment BSD +@item PRIO_PROCESS +One particular process. The argument @var{id} is a process ID (pid). + +@comment sys/resource.h +@comment BSD +@item PRIO_PGRP +All the processes in a particular process group. The argument @var{id} is +a process group ID (pgid). + +@comment sys/resource.h +@comment BSD +@item PRIO_USER +All the processes owned by a particular user (i.e., whose real uid +indicates the user). The argument @var{id} is a user ID (uid). +@end vtable + +If the argument @var{id} is 0, it stands for the calling process, its +process group, or its owner (real uid), according to @var{class}. + +@comment unistd.h +@comment BSD +@deftypefun int nice (int @var{increment}) +@safety{@prelim{}@mtunsafe{@mtasurace{:setpriority}}@asunsafe{}@acsafe{}} +@c Calls getpriority before and after setpriority, using the result of +@c the first call to compute the argument for setpriority. This creates +@c a window for a concurrent setpriority (or nice) call to be lost or +@c exhibit surprising behavior. +Increment the nice value of the calling process by @var{increment}. +The return value is the new nice value on success, and @code{-1} on +failure. In the case of failure, @code{errno} will be set to the +same values as for @code{setpriority}. + + +Here is an equivalent definition of @code{nice}: + +@smallexample +int +nice (int increment) +@{ + int result, old = getpriority (PRIO_PROCESS, 0); + result = setpriority (PRIO_PROCESS, 0, old + increment); + if (result != -1) + return old + increment; + else + return -1; +@} +@end smallexample +@end deftypefun + + +@node CPU Affinity +@subsection Limiting execution to certain CPUs + +On a multi-processor system the operating system usually distributes +the different processes which are runnable on all available CPUs in a +way which allows the system to work most efficiently. Which processes +and threads run can be to some extend be control with the scheduling +functionality described in the last sections. But which CPU finally +executes which process or thread is not covered. + +There are a number of reasons why a program might want to have control +over this aspect of the system as well: + +@itemize @bullet +@item +One thread or process is responsible for absolutely critical work +which under no circumstances must be interrupted or hindered from +making progress by other processes or threads using CPU resources. In +this case the special process would be confined to a CPU which no +other process or thread is allowed to use. + +@item +The access to certain resources (RAM, I/O ports) has different costs +from different CPUs. This is the case in NUMA (Non-Uniform Memory +Architecture) machines. Preferably memory should be accessed locally +but this requirement is usually not visible to the scheduler. +Therefore forcing a process or thread to the CPUs which have local +access to the most-used memory helps to significantly boost the +performance. + +@item +In controlled runtimes resource allocation and book-keeping work (for +instance garbage collection) is performance local to processors. This +can help to reduce locking costs if the resources do not have to be +protected from concurrent accesses from different processors. +@end itemize + +The POSIX standard up to this date is of not much help to solve this +problem. The Linux kernel provides a set of interfaces to allow +specifying @emph{affinity sets} for a process. The scheduler will +schedule the thread or process on CPUs specified by the affinity +masks. The interfaces which @theglibc{} define follow to some +extent the Linux kernel interface. + +@comment sched.h +@comment GNU +@deftp {Data Type} cpu_set_t +This data set is a bitset where each bit represents a CPU. How the +system's CPUs are mapped to bits in the bitset is system dependent. +The data type has a fixed size; in the unlikely case that the number +of bits are not sufficient to describe the CPUs of the system a +different interface has to be used. + +This type is a GNU extension and is defined in @file{sched.h}. +@end deftp + +To manipulate the bitset, to set and reset bits, a number of macros are +defined. Some of the macros take a CPU number as a parameter. Here +it is important to never exceed the size of the bitset. The following +macro specifies the number of bits in the @code{cpu_set_t} bitset. + +@comment sched.h +@comment GNU +@deftypevr Macro int CPU_SETSIZE +The value of this macro is the maximum number of CPUs which can be +handled with a @code{cpu_set_t} object. +@end deftypevr + +The type @code{cpu_set_t} should be considered opaque; all +manipulation should happen via the next four macros. + +@comment sched.h +@comment GNU +@deftypefn Macro void CPU_ZERO (cpu_set_t *@var{set}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c CPU_ZERO ok +@c __CPU_ZERO_S ok +@c memset dup ok +This macro initializes the CPU set @var{set} to be the empty set. + +This macro is a GNU extension and is defined in @file{sched.h}. +@end deftypefn + +@comment sched.h +@comment GNU +@deftypefn Macro void CPU_SET (int @var{cpu}, cpu_set_t *@var{set}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c CPU_SET ok +@c __CPU_SET_S ok +@c __CPUELT ok +@c __CPUMASK ok +This macro adds @var{cpu} to the CPU set @var{set}. + +The @var{cpu} parameter must not have side effects since it is +evaluated more than once. + +This macro is a GNU extension and is defined in @file{sched.h}. +@end deftypefn + +@comment sched.h +@comment GNU +@deftypefn Macro void CPU_CLR (int @var{cpu}, cpu_set_t *@var{set}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c CPU_CLR ok +@c __CPU_CLR_S ok +@c __CPUELT dup ok +@c __CPUMASK dup ok +This macro removes @var{cpu} from the CPU set @var{set}. + +The @var{cpu} parameter must not have side effects since it is +evaluated more than once. + +This macro is a GNU extension and is defined in @file{sched.h}. +@end deftypefn + +@comment sched.h +@comment GNU +@deftypefn Macro int CPU_ISSET (int @var{cpu}, const cpu_set_t *@var{set}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c CPU_ISSET ok +@c __CPU_ISSET_S ok +@c __CPUELT dup ok +@c __CPUMASK dup ok +This macro returns a nonzero value (true) if @var{cpu} is a member +of the CPU set @var{set}, and zero (false) otherwise. + +The @var{cpu} parameter must not have side effects since it is +evaluated more than once. + +This macro is a GNU extension and is defined in @file{sched.h}. +@end deftypefn + + +CPU bitsets can be constructed from scratch or the currently installed +affinity mask can be retrieved from the system. + +@comment sched.h +@comment GNU +@deftypefun int sched_getaffinity (pid_t @var{pid}, size_t @var{cpusetsize}, cpu_set_t *@var{cpuset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Wrapped syscall to zero out past the kernel cpu set size; Linux +@c only. + +This function stores the CPU affinity mask for the process or thread +with the ID @var{pid} in the @var{cpusetsize} bytes long bitmap +pointed to by @var{cpuset}. If successful, the function always +initializes all bits in the @code{cpu_set_t} object and returns zero. + +If @var{pid} does not correspond to a process or thread on the system +the or the function fails for some other reason, it returns @code{-1} +and @code{errno} is set to represent the error condition. + +@table @code +@item ESRCH +No process or thread with the given ID found. + +@item EFAULT +The pointer @var{cpuset} does not point to a valid object. +@end table + +This function is a GNU extension and is declared in @file{sched.h}. +@end deftypefun + +Note that it is not portably possible to use this information to +retrieve the information for different POSIX threads. A separate +interface must be provided for that. + +@comment sched.h +@comment GNU +@deftypefun int sched_setaffinity (pid_t @var{pid}, size_t @var{cpusetsize}, const cpu_set_t *@var{cpuset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Wrapped syscall to detect attempts to set bits past the kernel cpu +@c set size; Linux only. + +This function installs the @var{cpusetsize} bytes long affinity mask +pointed to by @var{cpuset} for the process or thread with the ID @var{pid}. +If successful the function returns zero and the scheduler will in the future +take the affinity information into account. + +If the function fails it will return @code{-1} and @code{errno} is set +to the error code: + +@table @code +@item ESRCH +No process or thread with the given ID found. + +@item EFAULT +The pointer @var{cpuset} does not point to a valid object. + +@item EINVAL +The bitset is not valid. This might mean that the affinity set might +not leave a processor for the process or thread to run on. +@end table + +This function is a GNU extension and is declared in @file{sched.h}. +@end deftypefun + + +@node Memory Resources +@section Querying memory available resources + +The amount of memory available in the system and the way it is organized +determines oftentimes the way programs can and have to work. For +functions like @code{mmap} it is necessary to know about the size of +individual memory pages and knowing how much memory is available enables +a program to select appropriate sizes for, say, caches. Before we get +into these details a few words about memory subsystems in traditional +Unix systems will be given. + +@menu +* Memory Subsystem:: Overview about traditional Unix memory handling. +* Query Memory Parameters:: How to get information about the memory + subsystem? +@end menu + +@node Memory Subsystem +@subsection Overview about traditional Unix memory handling + +@cindex address space +@cindex physical memory +@cindex physical address +Unix systems normally provide processes virtual address spaces. This +means that the addresses of the memory regions do not have to correspond +directly to the addresses of the actual physical memory which stores the +data. An extra level of indirection is introduced which translates +virtual addresses into physical addresses. This is normally done by the +hardware of the processor. + +@cindex shared memory +Using a virtual address space has several advantages. The most important +is process isolation. The different processes running on the system +cannot interfere directly with each other. No process can write into +the address space of another process (except when shared memory is used +but then it is wanted and controlled). + +Another advantage of virtual memory is that the address space the +processes see can actually be larger than the physical memory available. +The physical memory can be extended by storage on an external media +where the content of currently unused memory regions is stored. The +address translation can then intercept accesses to these memory regions +and make memory content available again by loading the data back into +memory. This concept makes it necessary that programs which have to use +lots of memory know the difference between available virtual address +space and available physical memory. If the working set of virtual +memory of all the processes is larger than the available physical memory +the system will slow down dramatically due to constant swapping of +memory content from the memory to the storage media and back. This is +called ``thrashing''. +@cindex thrashing + +@cindex memory page +@cindex page, memory +A final aspect of virtual memory which is important and follows from +what is said in the last paragraph is the granularity of the virtual +address space handling. When we said that the virtual address handling +stores memory content externally it cannot do this on a byte-by-byte +basis. The administrative overhead does not allow this (leaving alone +the processor hardware). Instead several thousand bytes are handled +together and form a @dfn{page}. The size of each page is always a power +of two bytes. The smallest page size in use today is 4096, with 8192, +16384, and 65536 being other popular sizes. + +@node Query Memory Parameters +@subsection How to get information about the memory subsystem? + +The page size of the virtual memory the process sees is essential to +know in several situations. Some programming interfaces (e.g., +@code{mmap}, @pxref{Memory-mapped I/O}) require the user to provide +information adjusted to the page size. In the case of @code{mmap} it is +necessary to provide a length argument which is a multiple of the page +size. Another place where the knowledge about the page size is useful +is in memory allocation. If one allocates pieces of memory in larger +chunks which are then subdivided by the application code it is useful to +adjust the size of the larger blocks to the page size. If the total +memory requirement for the block is close (but not larger) to a multiple +of the page size the kernel's memory handling can work more effectively +since it only has to allocate memory pages which are fully used. (To do +this optimization it is necessary to know a bit about the memory +allocator which will require a bit of memory itself for each block and +this overhead must not push the total size over the page size multiple.) + +The page size traditionally was a compile time constant. But recent +development of processors changed this. Processors now support +different page sizes and they can possibly even vary among different +processes on the same system. Therefore the system should be queried at +runtime about the current page size and no assumptions (except about it +being a power of two) should be made. + +@vindex _SC_PAGESIZE +The correct interface to query about the page size is @code{sysconf} +(@pxref{Sysconf Definition}) with the parameter @code{_SC_PAGESIZE}. +There is a much older interface available, too. + +@comment unistd.h +@comment BSD +@deftypefun int getpagesize (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Obtained from the aux vec at program startup time. GNU/Linux/m68k is +@c the exception, with the possibility of a syscall. +The @code{getpagesize} function returns the page size of the process. +This value is fixed for the runtime of the process but can vary in +different runs of the application. + +The function is declared in @file{unistd.h}. +@end deftypefun + +Widely available on @w{System V} derived systems is a method to get +information about the physical memory the system has. The call + +@vindex _SC_PHYS_PAGES +@cindex sysconf +@smallexample + sysconf (_SC_PHYS_PAGES) +@end smallexample + +@noindent +returns the total number of pages of physical memory the system has. +This does not mean all this memory is available. This information can +be found using + +@vindex _SC_AVPHYS_PAGES +@cindex sysconf +@smallexample + sysconf (_SC_AVPHYS_PAGES) +@end smallexample + +These two values help to optimize applications. The value returned for +@code{_SC_AVPHYS_PAGES} is the amount of memory the application can use +without hindering any other process (given that no other process +increases its memory usage). The value returned for +@code{_SC_PHYS_PAGES} is more or less a hard limit for the working set. +If all applications together constantly use more than that amount of +memory the system is in trouble. + +@Theglibc{} provides in addition to these already described way to +get this information two functions. They are declared in the file +@file{sys/sysinfo.h}. Programmers should prefer to use the +@code{sysconf} method described above. + +@comment sys/sysinfo.h +@comment GNU +@deftypefun {long int} get_phys_pages (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c This fopens a /proc file and scans it for the requested information. +The @code{get_phys_pages} function returns the total number of pages of +physical memory the system has. To get the amount of memory this number has to +be multiplied by the page size. + +This function is a GNU extension. +@end deftypefun + +@comment sys/sysinfo.h +@comment GNU +@deftypefun {long int} get_avphys_pages (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +The @code{get_avphys_pages} function returns the number of available pages of +physical memory the system has. To get the amount of memory this number has to +be multiplied by the page size. + +This function is a GNU extension. +@end deftypefun + +@node Processor Resources +@section Learn about the processors available + +The use of threads or processes with shared memory allows an application +to take advantage of all the processing power a system can provide. If +the task can be parallelized the optimal way to write an application is +to have at any time as many processes running as there are processors. +To determine the number of processors available to the system one can +run + +@vindex _SC_NPROCESSORS_CONF +@cindex sysconf +@smallexample + sysconf (_SC_NPROCESSORS_CONF) +@end smallexample + +@noindent +which returns the number of processors the operating system configured. +But it might be possible for the operating system to disable individual +processors and so the call + +@vindex _SC_NPROCESSORS_ONLN +@cindex sysconf +@smallexample + sysconf (_SC_NPROCESSORS_ONLN) +@end smallexample + +@noindent +returns the number of processors which are currently online (i.e., +available). + +For these two pieces of information @theglibc{} also provides +functions to get the information directly. The functions are declared +in @file{sys/sysinfo.h}. + +@comment sys/sysinfo.h +@comment GNU +@deftypefun int get_nprocs_conf (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c This function reads from from /sys using dir streams (single user, so +@c no @mtasurace issue), and on some arches, from /proc using streams. +The @code{get_nprocs_conf} function returns the number of processors the +operating system configured. + +This function is a GNU extension. +@end deftypefun + +@comment sys/sysinfo.h +@comment GNU +@deftypefun int get_nprocs (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +@c This function reads from /proc using file descriptor I/O. +The @code{get_nprocs} function returns the number of available processors. + +This function is a GNU extension. +@end deftypefun + +@cindex load average +Before starting more threads it should be checked whether the processors +are not already overused. Unix systems calculate something called the +@dfn{load average}. This is a number indicating how many processes were +running. This number is an average over different periods of time +(normally 1, 5, and 15 minutes). + +@comment stdlib.h +@comment BSD +@deftypefun int getloadavg (double @var{loadavg}[], int @var{nelem}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +@c Calls host_info on HURD; on Linux, opens /proc/loadavg, reads from +@c it, closes it, without cancellation point, and calls strtod_l with +@c the C locale to convert the strings to doubles. +This function gets the 1, 5 and 15 minute load averages of the +system. The values are placed in @var{loadavg}. @code{getloadavg} will +place at most @var{nelem} elements into the array but never more than +three elements. The return value is the number of elements written to +@var{loadavg}, or -1 on error. + +This function is declared in @file{stdlib.h}. +@end deftypefun diff --git a/REORG.TODO/manual/search.texi b/REORG.TODO/manual/search.texi new file mode 100644 index 0000000000..1d9628d6e3 --- /dev/null +++ b/REORG.TODO/manual/search.texi @@ -0,0 +1,637 @@ +@node Searching and Sorting, Pattern Matching, Message Translation, Top +@c %MENU% General searching and sorting functions +@chapter Searching and Sorting + +This chapter describes functions for searching and sorting arrays of +arbitrary objects. You pass the appropriate comparison function to be +applied as an argument, along with the size of the objects in the array +and the total number of elements. + +@menu +* Comparison Functions:: Defining how to compare two objects. + Since the sort and search facilities + are general, you have to specify the + ordering. +* Array Search Function:: The @code{bsearch} function. +* Array Sort Function:: The @code{qsort} function. +* Search/Sort Example:: An example program. +* Hash Search Function:: The @code{hsearch} function. +* Tree Search Function:: The @code{tsearch} function. +@end menu + +@node Comparison Functions +@section Defining the Comparison Function +@cindex Comparison Function + +In order to use the sorted array library functions, you have to describe +how to compare the elements of the array. + +To do this, you supply a comparison function to compare two elements of +the array. The library will call this function, passing as arguments +pointers to two array elements to be compared. Your comparison function +should return a value the way @code{strcmp} (@pxref{String/Array +Comparison}) does: negative if the first argument is ``less'' than the +second, zero if they are ``equal'', and positive if the first argument +is ``greater''. + +Here is an example of a comparison function which works with an array of +numbers of type @code{double}: + +@smallexample +int +compare_doubles (const void *a, const void *b) +@{ + const double *da = (const double *) a; + const double *db = (const double *) b; + + return (*da > *db) - (*da < *db); +@} +@end smallexample + +The header file @file{stdlib.h} defines a name for the data type of +comparison functions. This type is a GNU extension. + +@comment stdlib.h +@comment GNU +@tindex comparison_fn_t +@smallexample +int comparison_fn_t (const void *, const void *); +@end smallexample + +@node Array Search Function +@section Array Search Function +@cindex search function (for arrays) +@cindex binary search function (for arrays) +@cindex array search function + +Generally searching for a specific element in an array means that +potentially all elements must be checked. @Theglibc{} contains +functions to perform linear search. The prototypes for the following +two functions can be found in @file{search.h}. + +@comment search.h +@comment SVID +@deftypefun {void *} lfind (const void *@var{key}, const void *@var{base}, size_t *@var{nmemb}, size_t @var{size}, comparison_fn_t @var{compar}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{lfind} function searches in the array with @code{*@var{nmemb}} +elements of @var{size} bytes pointed to by @var{base} for an element +which matches the one pointed to by @var{key}. The function pointed to +by @var{compar} is used to decide whether two elements match. + +The return value is a pointer to the matching element in the array +starting at @var{base} if it is found. If no matching element is +available @code{NULL} is returned. + +The mean runtime of this function is @code{*@var{nmemb}}/2. This +function should only be used if elements often get added to or deleted from +the array in which case it might not be useful to sort the array before +searching. +@end deftypefun + +@comment search.h +@comment SVID +@deftypefun {void *} lsearch (const void *@var{key}, void *@var{base}, size_t *@var{nmemb}, size_t @var{size}, comparison_fn_t @var{compar}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c A signal handler that interrupted an insertion and performed an +@c insertion itself would leave the array in a corrupt state (e.g. one +@c new element initialized twice, with parts of both initializations +@c prevailing, and another uninitialized element), but this is just a +@c special case of races on user-controlled objects, that have to be +@c avoided by users. + +@c In case of cancellation, we know the array won't be left in a corrupt +@c state; the new element is initialized before the element count is +@c incremented, and the compiler can't reorder these operations because +@c it can't know that they don't alias. So, we'll either cancel after +@c the increment and the initialization are both complete, or the +@c increment won't have taken place, and so how far the initialization +@c got doesn't matter. +The @code{lsearch} function is similar to the @code{lfind} function. It +searches the given array for an element and returns it if found. The +difference is that if no matching element is found the @code{lsearch} +function adds the object pointed to by @var{key} (with a size of +@var{size} bytes) at the end of the array and it increments the value of +@code{*@var{nmemb}} to reflect this addition. + +This means for the caller that if it is not sure that the array contains +the element one is searching for the memory allocated for the array +starting at @var{base} must have room for at least @var{size} more +bytes. If one is sure the element is in the array it is better to use +@code{lfind} so having more room in the array is always necessary when +calling @code{lsearch}. +@end deftypefun + +To search a sorted array for an element matching the key, use the +@code{bsearch} function. The prototype for this function is in +the header file @file{stdlib.h}. +@pindex stdlib.h + +@comment stdlib.h +@comment ISO +@deftypefun {void *} bsearch (const void *@var{key}, const void *@var{array}, size_t @var{count}, size_t @var{size}, comparison_fn_t @var{compare}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{bsearch} function searches the sorted array @var{array} for an object +that is equivalent to @var{key}. The array contains @var{count} elements, +each of which is of size @var{size} bytes. + +The @var{compare} function is used to perform the comparison. This +function is called with two pointer arguments and should return an +integer less than, equal to, or greater than zero corresponding to +whether its first argument is considered less than, equal to, or greater +than its second argument. The elements of the @var{array} must already +be sorted in ascending order according to this comparison function. + +The return value is a pointer to the matching array element, or a null +pointer if no match is found. If the array contains more than one element +that matches, the one that is returned is unspecified. + +This function derives its name from the fact that it is implemented +using the binary search algorithm. +@end deftypefun + +@node Array Sort Function +@section Array Sort Function +@cindex sort function (for arrays) +@cindex quick sort function (for arrays) +@cindex array sort function + +To sort an array using an arbitrary comparison function, use the +@code{qsort} function. The prototype for this function is in +@file{stdlib.h}. +@pindex stdlib.h + +@comment stdlib.h +@comment ISO +@deftypefun void qsort (void *@var{array}, size_t @var{count}, size_t @var{size}, comparison_fn_t @var{compare}) +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@acucorrupt{}}} +The @code{qsort} function sorts the array @var{array}. The array +contains @var{count} elements, each of which is of size @var{size}. + +The @var{compare} function is used to perform the comparison on the +array elements. This function is called with two pointer arguments and +should return an integer less than, equal to, or greater than zero +corresponding to whether its first argument is considered less than, +equal to, or greater than its second argument. + +@cindex stable sorting +@strong{Warning:} If two objects compare as equal, their order after +sorting is unpredictable. That is to say, the sorting is not stable. +This can make a difference when the comparison considers only part of +the elements. Two elements with the same sort key may differ in other +respects. + +Although the object addresses passed to the comparison function lie +within the array, they need not correspond with the original locations +of those objects because the sorting algorithm may swap around objects +in the array before making some comparisons. The only way to perform +a stable sort with @code{qsort} is to first augment the objects with a +monotonic counter of some kind. + +Here is a simple example of sorting an array of doubles in numerical +order, using the comparison function defined above (@pxref{Comparison +Functions}): + +@smallexample +@{ + double *array; + int size; + @dots{} + qsort (array, size, sizeof (double), compare_doubles); +@} +@end smallexample + +The @code{qsort} function derives its name from the fact that it was +originally implemented using the ``quick sort'' algorithm. + +The implementation of @code{qsort} in this library might not be an +in-place sort and might thereby use an extra amount of memory to store +the array. +@end deftypefun + +@node Search/Sort Example +@section Searching and Sorting Example + +Here is an example showing the use of @code{qsort} and @code{bsearch} +with an array of structures. The objects in the array are sorted +by comparing their @code{name} fields with the @code{strcmp} function. +Then, we can look up individual objects based on their names. + +@comment This example is dedicated to the memory of Jim Henson. RIP. +@smallexample +@include search.c.texi +@end smallexample + +@cindex Kermit the frog +The output from this program looks like: + +@smallexample +Kermit, the frog +Piggy, the pig +Gonzo, the whatever +Fozzie, the bear +Sam, the eagle +Robin, the frog +Animal, the animal +Camilla, the chicken +Sweetums, the monster +Dr. Strangepork, the pig +Link Hogthrob, the pig +Zoot, the human +Dr. Bunsen Honeydew, the human +Beaker, the human +Swedish Chef, the human + +Animal, the animal +Beaker, the human +Camilla, the chicken +Dr. Bunsen Honeydew, the human +Dr. Strangepork, the pig +Fozzie, the bear +Gonzo, the whatever +Kermit, the frog +Link Hogthrob, the pig +Piggy, the pig +Robin, the frog +Sam, the eagle +Swedish Chef, the human +Sweetums, the monster +Zoot, the human + +Kermit, the frog +Gonzo, the whatever +Couldn't find Janice. +@end smallexample + + +@node Hash Search Function +@section The @code{hsearch} function. + +The functions mentioned so far in this chapter are for searching in a sorted +or unsorted array. There are other methods to organize information +which later should be searched. The costs of insert, delete and search +differ. One possible implementation is using hashing tables. +The following functions are declared in the header file @file{search.h}. + +@comment search.h +@comment SVID +@deftypefun int hcreate (size_t @var{nel}) +@safety{@prelim{}@mtunsafe{@mtasurace{:hsearch}}@asunsafe{@ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c hcreate @mtasurace:hsearch @ascuheap @acucorrupt @acsmem +@c hcreate_r dup @mtsrace:htab @ascuheap @acucorrupt @acsmem +The @code{hcreate} function creates a hashing table which can contain at +least @var{nel} elements. There is no possibility to grow this table so +it is necessary to choose the value for @var{nel} wisely. The method +used to implement this function might make it necessary to make the +number of elements in the hashing table larger than the expected maximal +number of elements. Hashing tables usually work inefficiently if they are +filled 80% or more. The constant access time guaranteed by hashing can +only be achieved if few collisions exist. See Knuth's ``The Art of +Computer Programming, Part 3: Searching and Sorting'' for more +information. + +The weakest aspect of this function is that there can be at most one +hashing table used through the whole program. The table is allocated +in local memory out of control of the programmer. As an extension @theglibc{} +provides an additional set of functions with a reentrant +interface which provides a similar interface but which allows keeping +arbitrarily many hashing tables. + +It is possible to use more than one hashing table in the program run if +the former table is first destroyed by a call to @code{hdestroy}. + +The function returns a non-zero value if successful. If it returns zero, +something went wrong. This could either mean there is already a hashing +table in use or the program ran out of memory. +@end deftypefun + +@comment search.h +@comment SVID +@deftypefun void hdestroy (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:hsearch}}@asunsafe{@ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c hdestroy @mtasurace:hsearch @ascuheap @acucorrupt @acsmem +@c hdestroy_r dup @mtsrace:htab @ascuheap @acucorrupt @acsmem +The @code{hdestroy} function can be used to free all the resources +allocated in a previous call of @code{hcreate}. After a call to this +function it is again possible to call @code{hcreate} and allocate a new +table with possibly different size. + +It is important to remember that the elements contained in the hashing +table at the time @code{hdestroy} is called are @emph{not} freed by this +function. It is the responsibility of the program code to free those +strings (if necessary at all). Freeing all the element memory is not +possible without extra, separately kept information since there is no +function to iterate through all available elements in the hashing table. +If it is really necessary to free a table and all elements the +programmer has to keep a list of all table elements and before calling +@code{hdestroy} s/he has to free all element's data using this list. +This is a very unpleasant mechanism and it also shows that this kind of +hashing table is mainly meant for tables which are created once and +used until the end of the program run. +@end deftypefun + +Entries of the hashing table and keys for the search are defined using +this type: + +@deftp {Data type} {struct ENTRY} +Both elements of this structure are pointers to zero-terminated strings. +This is a limiting restriction of the functionality of the +@code{hsearch} functions. They can only be used for data sets which use +the NUL character always and solely to terminate the records. It is not +possible to handle general binary data. + +@table @code +@item char *key +Pointer to a zero-terminated string of characters describing the key for +the search or the element in the hashing table. +@item char *data +Pointer to a zero-terminated string of characters describing the data. +If the functions will be called only for searching an existing entry +this element might stay undefined since it is not used. +@end table +@end deftp + +@comment search.h +@comment SVID +@deftypefun {ENTRY *} hsearch (ENTRY @var{item}, ACTION @var{action}) +@safety{@prelim{}@mtunsafe{@mtasurace{:hsearch}}@asunsafe{}@acunsafe{@acucorrupt{/action==ENTER}}} +@c hsearch @mtasurace:hsearch @acucorrupt/action==ENTER +@c hsearch_r dup @mtsrace:htab @acucorrupt/action==ENTER +To search in a hashing table created using @code{hcreate} the +@code{hsearch} function must be used. This function can perform a simple +search for an element (if @var{action} has the value @code{FIND}) or it can +alternatively insert the key element into the hashing table. Entries +are never replaced. + +The key is denoted by a pointer to an object of type @code{ENTRY}. For +locating the corresponding position in the hashing table only the +@code{key} element of the structure is used. + +If an entry with a matching key is found the @var{action} parameter is +irrelevant. The found entry is returned. If no matching entry is found +and the @var{action} parameter has the value @code{FIND} the function +returns a @code{NULL} pointer. If no entry is found and the +@var{action} parameter has the value @code{ENTER} a new entry is added +to the hashing table which is initialized with the parameter @var{item}. +A pointer to the newly added entry is returned. +@end deftypefun + +As mentioned before, the hashing table used by the functions described so +far is global and there can be at any time at most one hashing table in +the program. A solution is to use the following functions which are a +GNU extension. All have in common that they operate on a hashing table +which is described by the content of an object of the type @code{struct +hsearch_data}. This type should be treated as opaque, none of its +members should be changed directly. + +@comment search.h +@comment GNU +@deftypefun int hcreate_r (size_t @var{nel}, struct hsearch_data *@var{htab}) +@safety{@prelim{}@mtsafe{@mtsrace{:htab}}@asunsafe{@ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c Unlike the lsearch array, the htab is (at least in part) opaque, so +@c let's make it absolutely clear that ensuring exclusive access is a +@c caller responsibility. + +@c Cancellation is unlikely to leave the htab in a corrupt state: the +@c last field to be initialized is the one that tells whether the entire +@c data structure was initialized, and there's a function call (calloc) +@c in between that will often ensure all other fields are written before +@c the table. However, should this call be inlined (say with LTO), this +@c assumption may not hold. The calloc call doesn't cross our library +@c interface barrier, so let's consider this could happen and mark this +@c with @acucorrupt. It's no safety loss, since we already have +@c @ascuheap anyway... + +@c hcreate_r @mtsrace:htab @ascuheap @acucorrupt @acsmem +@c isprime ok +@c calloc dup @ascuheap @acsmem +The @code{hcreate_r} function initializes the object pointed to by +@var{htab} to contain a hashing table with at least @var{nel} elements. +So this function is equivalent to the @code{hcreate} function except +that the initialized data structure is controlled by the user. + +This allows having more than one hashing table at one time. The memory +necessary for the @code{struct hsearch_data} object can be allocated +dynamically. It must be initialized with zero before calling this +function. + +The return value is non-zero if the operation was successful. If the +return value is zero, something went wrong, which probably means the +program ran out of memory. +@end deftypefun + +@comment search.h +@comment GNU +@deftypefun void hdestroy_r (struct hsearch_data *@var{htab}) +@safety{@prelim{}@mtsafe{@mtsrace{:htab}}@asunsafe{@ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c The table is released while the table pointer still points to it. +@c Async cancellation is thus unsafe, but it already was because we call +@c free(). Using the table in a handler while it's being released would +@c also be dangerous, but calling free() already makes it unsafe, and +@c the requirement on the caller to ensure exclusive access already +@c guarantees this doesn't happen, so we don't get @asucorrupt. + +@c hdestroy_r @mtsrace:htab @ascuheap @acucorrupt @acsmem +@c free dup @ascuheap @acsmem +The @code{hdestroy_r} function frees all resources allocated by the +@code{hcreate_r} function for this very same object @var{htab}. As for +@code{hdestroy} it is the program's responsibility to free the strings +for the elements of the table. +@end deftypefun + +@comment search.h +@comment GNU +@deftypefun int hsearch_r (ENTRY @var{item}, ACTION @var{action}, ENTRY **@var{retval}, struct hsearch_data *@var{htab}) +@safety{@prelim{}@mtsafe{@mtsrace{:htab}}@assafe{}@acunsafe{@acucorrupt{/action==ENTER}}} +@c Callers have to ensure mutual exclusion; insertion, if cancelled, +@c leaves the table in a corrupt state. + +@c hsearch_r @mtsrace:htab @acucorrupt/action==ENTER +@c strlen dup ok +@c strcmp dup ok +The @code{hsearch_r} function is equivalent to @code{hsearch}. The +meaning of the first two arguments is identical. But instead of +operating on a single global hashing table the function works on the +table described by the object pointed to by @var{htab} (which is +initialized by a call to @code{hcreate_r}). + +Another difference to @code{hcreate} is that the pointer to the found +entry in the table is not the return value of the function. It is +returned by storing it in a pointer variable pointed to by the +@var{retval} parameter. The return value of the function is an integer +value indicating success if it is non-zero and failure if it is zero. +In the latter case the global variable @var{errno} signals the reason for +the failure. + +@table @code +@item ENOMEM +The table is filled and @code{hsearch_r} was called with a so far +unknown key and @var{action} set to @code{ENTER}. +@item ESRCH +The @var{action} parameter is @code{FIND} and no corresponding element +is found in the table. +@end table +@end deftypefun + + +@node Tree Search Function +@section The @code{tsearch} function. + +Another common form to organize data for efficient search is to use +trees. The @code{tsearch} function family provides a nice interface to +functions to organize possibly large amounts of data by providing a mean +access time proportional to the logarithm of the number of elements. +@Theglibc{} implementation even guarantees that this bound is +never exceeded even for input data which cause problems for simple +binary tree implementations. + +The functions described in the chapter are all described in the @w{System +V} and X/Open specifications and are therefore quite portable. + +In contrast to the @code{hsearch} functions the @code{tsearch} functions +can be used with arbitrary data and not only zero-terminated strings. + +The @code{tsearch} functions have the advantage that no function to +initialize data structures is necessary. A simple pointer of type +@code{void *} initialized to @code{NULL} is a valid tree and can be +extended or searched. The prototypes for these functions can be found +in the header file @file{search.h}. + +@comment search.h +@comment SVID +@deftypefun {void *} tsearch (const void *@var{key}, void **@var{rootp}, comparison_fn_t @var{compar}) +@safety{@prelim{}@mtsafe{@mtsrace{:rootp}}@asunsafe{@ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c The tree is not modified in a thread-safe manner, and rotations may +@c leave the tree in an inconsistent state that could be observed in an +@c asynchronous signal handler (except for the caller-synchronization +@c requirement) or after asynchronous cancellation of the thread +@c performing the rotation or the insertion. +The @code{tsearch} function searches in the tree pointed to by +@code{*@var{rootp}} for an element matching @var{key}. The function +pointed to by @var{compar} is used to determine whether two elements +match. @xref{Comparison Functions}, for a specification of the functions +which can be used for the @var{compar} parameter. + +If the tree does not contain a matching entry the @var{key} value will +be added to the tree. @code{tsearch} does not make a copy of the object +pointed to by @var{key} (how could it since the size is unknown). +Instead it adds a reference to this object which means the object must +be available as long as the tree data structure is used. + +The tree is represented by a pointer to a pointer since it is sometimes +necessary to change the root node of the tree. So it must not be +assumed that the variable pointed to by @var{rootp} has the same value +after the call. This also shows that it is not safe to call the +@code{tsearch} function more than once at the same time using the same +tree. It is no problem to run it more than once at a time on different +trees. + +The return value is a pointer to the matching element in the tree. If a +new element was created the pointer points to the new data (which is in +fact @var{key}). If an entry had to be created and the program ran out +of space @code{NULL} is returned. +@end deftypefun + +@comment search.h +@comment SVID +@deftypefun {void *} tfind (const void *@var{key}, void *const *@var{rootp}, comparison_fn_t @var{compar}) +@safety{@prelim{}@mtsafe{@mtsrace{:rootp}}@assafe{}@acsafe{}} +The @code{tfind} function is similar to the @code{tsearch} function. It +locates an element matching the one pointed to by @var{key} and returns +a pointer to this element. But if no matching element is available no +new element is entered (note that the @var{rootp} parameter points to a +constant pointer). Instead the function returns @code{NULL}. +@end deftypefun + +Another advantage of the @code{tsearch} functions in contrast to the +@code{hsearch} functions is that there is an easy way to remove +elements. + +@comment search.h +@comment SVID +@deftypefun {void *} tdelete (const void *@var{key}, void **@var{rootp}, comparison_fn_t @var{compar}) +@safety{@prelim{}@mtsafe{@mtsrace{:rootp}}@asunsafe{@ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +To remove a specific element matching @var{key} from the tree +@code{tdelete} can be used. It locates the matching element using the +same method as @code{tfind}. The corresponding element is then removed +and a pointer to the parent of the deleted node is returned by the +function. If there is no matching entry in the tree nothing can be +deleted and the function returns @code{NULL}. If the root of the tree +is deleted @code{tdelete} returns some unspecified value not equal to +@code{NULL}. +@end deftypefun + +@comment search.h +@comment GNU +@deftypefun void tdestroy (void *@var{vroot}, __free_fn_t @var{freefct}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +If the complete search tree has to be removed one can use +@code{tdestroy}. It frees all resources allocated by the @code{tsearch} +functions to generate the tree pointed to by @var{vroot}. + +For the data in each tree node the function @var{freefct} is called. +The pointer to the data is passed as the argument to the function. If +no such work is necessary @var{freefct} must point to a function doing +nothing. It is called in any case. + +This function is a GNU extension and not covered by the @w{System V} or +X/Open specifications. +@end deftypefun + +In addition to the functions to create and destroy the tree data +structure, there is another function which allows you to apply a +function to all elements of the tree. The function must have this type: + +@smallexample +void __action_fn_t (const void *nodep, VISIT value, int level); +@end smallexample + +The @var{nodep} is the data value of the current node (once given as the +@var{key} argument to @code{tsearch}). @var{level} is a numeric value +which corresponds to the depth of the current node in the tree. The +root node has the depth @math{0} and its children have a depth of +@math{1} and so on. The @code{VISIT} type is an enumeration type. + +@deftp {Data Type} VISIT +The @code{VISIT} value indicates the status of the current node in the +tree and how the function is called. The status of a node is either +`leaf' or `internal node'. For each leaf node the function is called +exactly once, for each internal node it is called three times: before +the first child is processed, after the first child is processed and +after both children are processed. This makes it possible to handle all +three methods of tree traversal (or even a combination of them). + +@vtable @code +@item preorder +The current node is an internal node and the function is called before +the first child was processed. +@item postorder +The current node is an internal node and the function is called after +the first child was processed. +@item endorder +The current node is an internal node and the function is called after +the second child was processed. +@item leaf +The current node is a leaf. +@end vtable +@end deftp + +@comment search.h +@comment SVID +@deftypefun void twalk (const void *@var{root}, __action_fn_t @var{action}) +@safety{@prelim{}@mtsafe{@mtsrace{:root}}@assafe{}@acsafe{}} +For each node in the tree with a node pointed to by @var{root}, the +@code{twalk} function calls the function provided by the parameter +@var{action}. For leaf nodes the function is called exactly once with +@var{value} set to @code{leaf}. For internal nodes the function is +called three times, setting the @var{value} parameter or @var{action} to +the appropriate value. The @var{level} argument for the @var{action} +function is computed while descending the tree by increasing the value +by one for each descent to a child, starting with the value @math{0} for +the root node. + +Since the functions used for the @var{action} parameter to @code{twalk} +must not modify the tree data, it is safe to run @code{twalk} in more +than one thread at the same time, working on the same tree. It is also +safe to call @code{tfind} in parallel. Functions which modify the tree +must not be used, otherwise the behavior is undefined. +@end deftypefun diff --git a/REORG.TODO/manual/setjmp.texi b/REORG.TODO/manual/setjmp.texi new file mode 100644 index 0000000000..94d16becdc --- /dev/null +++ b/REORG.TODO/manual/setjmp.texi @@ -0,0 +1,496 @@ +@node Non-Local Exits, Signal Handling, Resource Usage And Limitation, Top +@c %MENU% Jumping out of nested function calls +@chapter Non-Local Exits +@cindex non-local exits +@cindex long jumps + +Sometimes when your program detects an unusual situation inside a deeply +nested set of function calls, you would like to be able to immediately +return to an outer level of control. This section describes how you can +do such @dfn{non-local exits} using the @code{setjmp} and @code{longjmp} +functions. + +@menu +* Intro: Non-Local Intro. When and how to use these facilities. +* Details: Non-Local Details. Functions for non-local exits. +* Non-Local Exits and Signals:: Portability issues. +* System V contexts:: Complete context control a la System V. +@end menu + +@node Non-Local Intro, Non-Local Details, , Non-Local Exits +@section Introduction to Non-Local Exits + +As an example of a situation where a non-local exit can be useful, +suppose you have an interactive program that has a ``main loop'' that +prompts for and executes commands. Suppose the ``read'' command reads +input from a file, doing some lexical analysis and parsing of the input +while processing it. If a low-level input error is detected, it would +be useful to be able to return immediately to the ``main loop'' instead +of having to make each of the lexical analysis, parsing, and processing +phases all have to explicitly deal with error situations initially +detected by nested calls. + +(On the other hand, if each of these phases has to do a substantial +amount of cleanup when it exits---such as closing files, deallocating +buffers or other data structures, and the like---then it can be more +appropriate to do a normal return and have each phase do its own +cleanup, because a non-local exit would bypass the intervening phases and +their associated cleanup code entirely. Alternatively, you could use a +non-local exit but do the cleanup explicitly either before or after +returning to the ``main loop''.) + +In some ways, a non-local exit is similar to using the @samp{return} +statement to return from a function. But while @samp{return} abandons +only a single function call, transferring control back to the point at +which it was called, a non-local exit can potentially abandon many +levels of nested function calls. + +You identify return points for non-local exits by calling the function +@code{setjmp}. This function saves information about the execution +environment in which the call to @code{setjmp} appears in an object of +type @code{jmp_buf}. Execution of the program continues normally after +the call to @code{setjmp}, but if an exit is later made to this return +point by calling @code{longjmp} with the corresponding @w{@code{jmp_buf}} +object, control is transferred back to the point where @code{setjmp} was +called. The return value from @code{setjmp} is used to distinguish +between an ordinary return and a return made by a call to +@code{longjmp}, so calls to @code{setjmp} usually appear in an @samp{if} +statement. + +Here is how the example program described above might be set up: + +@smallexample +@include setjmp.c.texi +@end smallexample + +The function @code{abort_to_main_loop} causes an immediate transfer of +control back to the main loop of the program, no matter where it is +called from. + +The flow of control inside the @code{main} function may appear a little +mysterious at first, but it is actually a common idiom with +@code{setjmp}. A normal call to @code{setjmp} returns zero, so the +``else'' clause of the conditional is executed. If +@code{abort_to_main_loop} is called somewhere within the execution of +@code{do_command}, then it actually appears as if the @emph{same} call +to @code{setjmp} in @code{main} were returning a second time with a value +of @code{-1}. + +@need 250 +So, the general pattern for using @code{setjmp} looks something like: + +@smallexample +if (setjmp (@var{buffer})) + /* @r{Code to clean up after premature return.} */ + @dots{} +else + /* @r{Code to be executed normally after setting up the return point.} */ + @dots{} +@end smallexample + +@node Non-Local Details, Non-Local Exits and Signals, Non-Local Intro, Non-Local Exits +@section Details of Non-Local Exits + +Here are the details on the functions and data structures used for +performing non-local exits. These facilities are declared in +@file{setjmp.h}. +@pindex setjmp.h + +@comment setjmp.h +@comment ISO +@deftp {Data Type} jmp_buf +Objects of type @code{jmp_buf} hold the state information to +be restored by a non-local exit. The contents of a @code{jmp_buf} +identify a specific place to return to. +@end deftp + +@comment setjmp.h +@comment ISO +@deftypefn Macro int setjmp (jmp_buf @var{state}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c _setjmp ok +@c __sigsetjmp(!savemask) ok +@c __sigjmp_save(!savemask) ok, does not call sigprocmask +When called normally, @code{setjmp} stores information about the +execution state of the program in @var{state} and returns zero. If +@code{longjmp} is later used to perform a non-local exit to this +@var{state}, @code{setjmp} returns a nonzero value. +@end deftypefn + +@comment setjmp.h +@comment ISO +@deftypefun void longjmp (jmp_buf @var{state}, int @var{value}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuplugin{} @asucorrupt{} @asulock{/hurd}}@acunsafe{@acucorrupt{} @aculock{/hurd}}} +@c __libc_siglongjmp @ascuplugin @asucorrupt @asulock/hurd @acucorrupt @aculock/hurd +@c _longjmp_unwind @ascuplugin @asucorrupt @acucorrupt +@c __pthread_cleanup_upto @ascuplugin @asucorrupt @acucorrupt +@c plugins may be unsafe themselves, but even if they weren't, this +@c function isn't robust WRT async signals and cancellation: +@c cleanups aren't taken off the stack right away, only after all +@c cleanups have been run. This means that async-cancelling +@c longjmp, or interrupting longjmp with an async signal handler +@c that calls longjmp may run the same cleanups multiple times. +@c _JMPBUF_UNWINDS_ADJ ok +@c *cleanup_buf->__routine @ascuplugin +@c sigprocmask(SIG_SETMASK) dup @asulock/hurd @aculock/hurd +@c __longjmp ok +This function restores current execution to the state saved in +@var{state}, and continues execution from the call to @code{setjmp} that +established that return point. Returning from @code{setjmp} by means of +@code{longjmp} returns the @var{value} argument that was passed to +@code{longjmp}, rather than @code{0}. (But if @var{value} is given as +@code{0}, @code{setjmp} returns @code{1}).@refill +@end deftypefun + +There are a lot of obscure but important restrictions on the use of +@code{setjmp} and @code{longjmp}. Most of these restrictions are +present because non-local exits require a fair amount of magic on the +part of the C compiler and can interact with other parts of the language +in strange ways. + +The @code{setjmp} function is actually a macro without an actual +function definition, so you shouldn't try to @samp{#undef} it or take +its address. In addition, calls to @code{setjmp} are safe in only the +following contexts: + +@itemize @bullet +@item +As the test expression of a selection or iteration +statement (such as @samp{if}, @samp{switch}, or @samp{while}). + +@item +As one operand of an equality or comparison operator that appears as the +test expression of a selection or iteration statement. The other +operand must be an integer constant expression. + +@item +As the operand of a unary @samp{!} operator, that appears as the +test expression of a selection or iteration statement. + +@item +By itself as an expression statement. +@end itemize + +Return points are valid only during the dynamic extent of the function +that called @code{setjmp} to establish them. If you @code{longjmp} to +a return point that was established in a function that has already +returned, unpredictable and disastrous things are likely to happen. + +You should use a nonzero @var{value} argument to @code{longjmp}. While +@code{longjmp} refuses to pass back a zero argument as the return value +from @code{setjmp}, this is intended as a safety net against accidental +misuse and is not really good programming style. + +When you perform a non-local exit, accessible objects generally retain +whatever values they had at the time @code{longjmp} was called. The +exception is that the values of automatic variables local to the +function containing the @code{setjmp} call that have been changed since +the call to @code{setjmp} are indeterminate, unless you have declared +them @code{volatile}. + +@node Non-Local Exits and Signals, System V contexts, Non-Local Details, Non-Local Exits +@section Non-Local Exits and Signals + +In BSD Unix systems, @code{setjmp} and @code{longjmp} also save and +restore the set of blocked signals; see @ref{Blocking Signals}. However, +the POSIX.1 standard requires @code{setjmp} and @code{longjmp} not to +change the set of blocked signals, and provides an additional pair of +functions (@code{sigsetjmp} and @code{siglongjmp}) to get the BSD +behavior. + +The behavior of @code{setjmp} and @code{longjmp} in @theglibc{} is +controlled by feature test macros; see @ref{Feature Test Macros}. The +default in @theglibc{} is the POSIX.1 behavior rather than the BSD +behavior. + +The facilities in this section are declared in the header file +@file{setjmp.h}. +@pindex setjmp.h + +@comment setjmp.h +@comment POSIX.1 +@deftp {Data Type} sigjmp_buf +This is similar to @code{jmp_buf}, except that it can also store state +information about the set of blocked signals. +@end deftp + +@comment setjmp.h +@comment POSIX.1 +@deftypefun int sigsetjmp (sigjmp_buf @var{state}, int @var{savesigs}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c sigsetjmp @asulock/hurd @aculock/hurd +@c __sigsetjmp(savemask) @asulock/hurd @aculock/hurd +@c __sigjmp_save(savemask) @asulock/hurd @aculock/hurd +@c sigprocmask(SIG_BLOCK probe) dup @asulock/hurd @aculock/hurd +This is similar to @code{setjmp}. If @var{savesigs} is nonzero, the set +of blocked signals is saved in @var{state} and will be restored if a +@code{siglongjmp} is later performed with this @var{state}. +@end deftypefun + +@comment setjmp.h +@comment POSIX.1 +@deftypefun void siglongjmp (sigjmp_buf @var{state}, int @var{value}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuplugin{} @asucorrupt{} @asulock{/hurd}}@acunsafe{@acucorrupt{} @aculock{/hurd}}} +@c Alias to longjmp. +This is similar to @code{longjmp} except for the type of its @var{state} +argument. If the @code{sigsetjmp} call that set this @var{state} used a +nonzero @var{savesigs} flag, @code{siglongjmp} also restores the set of +blocked signals. +@end deftypefun + +@node System V contexts,, Non-Local Exits and Signals, Non-Local Exits +@section Complete Context Control + +The Unix standard provides one more set of functions to control the +execution path and these functions are more powerful than those +discussed in this chapter so far. These functions were part of the +original @w{System V} API and by this route were added to the Unix +API. Besides on branded Unix implementations these interfaces are not +widely available. Not all platforms and/or architectures @theglibc{} +is available on provide this interface. Use @file{configure} to +detect the availability. + +Similar to the @code{jmp_buf} and @code{sigjmp_buf} types used for the +variables to contain the state of the @code{longjmp} functions the +interfaces of interest here have an appropriate type as well. Objects +of this type are normally much larger since more information is +contained. The type is also used in a few more places as we will see. +The types and functions described in this section are all defined and +declared respectively in the @file{ucontext.h} header file. + +@comment ucontext.h +@comment SVID +@deftp {Data Type} ucontext_t + +The @code{ucontext_t} type is defined as a structure with at least the +following elements: + +@table @code +@item ucontext_t *uc_link +This is a pointer to the next context structure which is used if the +context described in the current structure returns. + +@item sigset_t uc_sigmask +Set of signals which are blocked when this context is used. + +@item stack_t uc_stack +Stack used for this context. The value need not be (and normally is +not) the stack pointer. @xref{Signal Stack}. + +@item mcontext_t uc_mcontext +This element contains the actual state of the process. The +@code{mcontext_t} type is also defined in this header but the definition +should be treated as opaque. Any use of knowledge of the type makes +applications less portable. + +@end table +@end deftp + +Objects of this type have to be created by the user. The initialization +and modification happens through one of the following functions: + +@comment ucontext.h +@comment SVID +@deftypefun int getcontext (ucontext_t *@var{ucp}) +@safety{@prelim{}@mtsafe{@mtsrace{:ucp}}@assafe{}@acsafe{}} +@c Linux-only implementations in assembly, including sigprocmask +@c syscall. A few cases call the sigprocmask function, but that's safe +@c too. The ppc case is implemented in terms of a swapcontext syscall. +The @code{getcontext} function initializes the variable pointed to by +@var{ucp} with the context of the calling thread. The context contains +the content of the registers, the signal mask, and the current stack. +Executing the contents would start at the point where the +@code{getcontext} call just returned. + +The function returns @code{0} if successful. Otherwise it returns +@code{-1} and sets @var{errno} accordingly. +@end deftypefun + +The @code{getcontext} function is similar to @code{setjmp} but it does +not provide an indication of whether @code{getcontext} is returning for +the first time or whether an initialized context has just been restored. +If this is necessary the user has to determine this herself. This must +be done carefully since the context contains registers which might contain +register variables. This is a good situation to define variables with +@code{volatile}. + +Once the context variable is initialized it can be used as is or it can +be modified using the @code{makecontext} function. The latter is normally +done when implementing co-routines or similar constructs. + +@comment ucontext.h +@comment SVID +@deftypefun void makecontext (ucontext_t *@var{ucp}, void (*@var{func}) (void), int @var{argc}, @dots{}) +@safety{@prelim{}@mtsafe{@mtsrace{:ucp}}@assafe{}@acsafe{}} +@c Linux-only implementations mostly in assembly, nothing unsafe. + +The @var{ucp} parameter passed to @code{makecontext} shall be +initialized by a call to @code{getcontext}. The context will be +modified in a way such that if the context is resumed it will start by +calling the function @code{func} which gets @var{argc} integer arguments +passed. The integer arguments which are to be passed should follow the +@var{argc} parameter in the call to @code{makecontext}. + +Before the call to this function the @code{uc_stack} and @code{uc_link} +element of the @var{ucp} structure should be initialized. The +@code{uc_stack} element describes the stack which is used for this +context. No two contexts which are used at the same time should use the +same memory region for a stack. + +The @code{uc_link} element of the object pointed to by @var{ucp} should +be a pointer to the context to be executed when the function @var{func} +returns or it should be a null pointer. See @code{setcontext} for more +information about the exact use. +@end deftypefun + +While allocating the memory for the stack one has to be careful. Most +modern processors keep track of whether a certain memory region is +allowed to contain code which is executed or not. Data segments and +heap memory are normally not tagged to allow this. The result is that +programs would fail. Examples for such code include the calling +sequences the GNU C compiler generates for calls to nested functions. +Safe ways to allocate stacks correctly include using memory on the +original thread's stack or explicitly allocating memory tagged for +execution using (@pxref{Memory-mapped I/O}). + +@strong{Compatibility note}: The current Unix standard is very imprecise +about the way the stack is allocated. All implementations seem to agree +that the @code{uc_stack} element must be used but the values stored in +the elements of the @code{stack_t} value are unclear. @Theglibc{} +and most other Unix implementations require the @code{ss_sp} value of +the @code{uc_stack} element to point to the base of the memory region +allocated for the stack and the size of the memory region is stored in +@code{ss_size}. There are implementations out there which require +@code{ss_sp} to be set to the value the stack pointer will have (which +can, depending on the direction the stack grows, be different). This +difference makes the @code{makecontext} function hard to use and it +requires detection of the platform at compile time. + +@comment ucontext.h +@comment SVID +@deftypefun int setcontext (const ucontext_t *@var{ucp}) +@safety{@prelim{}@mtsafe{@mtsrace{:ucp}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +@c Linux-only implementations mostly in assembly. Some ports use +@c sigreturn or swapcontext syscalls; others restore the signal mask +@c first and then proceed restore other registers in userland, which +@c leaves a window for cancellation or async signals with misaligned or +@c otherwise corrupt stack. ??? Switching to a different stack, or even +@c to an earlier state on the same stack, may conflict with pthread +@c cleanups. This is not quite MT-Unsafe, it's a different kind of +@c safety issue. + +The @code{setcontext} function restores the context described by +@var{ucp}. The context is not modified and can be reused as often as +wanted. + +If the context was created by @code{getcontext} execution resumes with +the registers filled with the same values and the same stack as if the +@code{getcontext} call just returned. + +If the context was modified with a call to @code{makecontext} execution +continues with the function passed to @code{makecontext} which gets the +specified parameters passed. If this function returns execution is +resumed in the context which was referenced by the @code{uc_link} +element of the context structure passed to @code{makecontext} at the +time of the call. If @code{uc_link} was a null pointer the application +terminates normally with an exit status value of @code{EXIT_SUCCESS} +(@pxref{Program Termination}). + +If the context was created by a call to a signal handler or from any +other source then the behaviour of @code{setcontext} is unspecified. + +Since the context contains information about the stack no two threads +should use the same context at the same time. The result in most cases +would be disastrous. + +The @code{setcontext} function does not return unless an error occurred +in which case it returns @code{-1}. +@end deftypefun + +The @code{setcontext} function simply replaces the current context with +the one described by the @var{ucp} parameter. This is often useful but +there are situations where the current context has to be preserved. + +@comment ucontext.h +@comment SVID +@deftypefun int swapcontext (ucontext_t *restrict @var{oucp}, const ucontext_t *restrict @var{ucp}) +@safety{@prelim{}@mtsafe{@mtsrace{:oucp} @mtsrace{:ucp}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +@c Linux-only implementations mostly in assembly. Some ports call or +@c inline getcontext and/or setcontext, adjusting the saved context in +@c between, so we inherit the potential issues of both. + +The @code{swapcontext} function is similar to @code{setcontext} but +instead of just replacing the current context the latter is first saved +in the object pointed to by @var{oucp} as if this was a call to +@code{getcontext}. The saved context would resume after the call to +@code{swapcontext}. + +Once the current context is saved the context described in @var{ucp} is +installed and execution continues as described in this context. + +If @code{swapcontext} succeeds the function does not return unless the +context @var{oucp} is used without prior modification by +@code{makecontext}. The return value in this case is @code{0}. If the +function fails it returns @code{-1} and sets @var{errno} accordingly. +@end deftypefun + +@heading Example for SVID Context Handling + +The easiest way to use the context handling functions is as a +replacement for @code{setjmp} and @code{longjmp}. The context contains +on most platforms more information which may lead to fewer surprises +but this also means using these functions is more expensive (besides +being less portable). + +@smallexample +int +random_search (int n, int (*fp) (int, ucontext_t *)) +@{ + volatile int cnt = 0; + ucontext_t uc; + + /* @r{Safe current context.} */ + if (getcontext (&uc) < 0) + return -1; + + /* @r{If we have not tried @var{n} times try again.} */ + if (cnt++ < n) + /* @r{Call the function with a new random number} + @r{and the context}. */ + if (fp (rand (), &uc) != 0) + /* @r{We found what we were looking for.} */ + return 1; + + /* @r{Not found.} */ + return 0; +@} +@end smallexample + +Using contexts in such a way enables emulating exception handling. The +search functions passed in the @var{fp} parameter could be very large, +nested, and complex which would make it complicated (or at least would +require a lot of code) to leave the function with an error value which +has to be passed down to the caller. By using the context it is +possible to leave the search function in one step and allow restarting +the search which also has the nice side effect that it can be +significantly faster. + +Something which is harder to implement with @code{setjmp} and +@code{longjmp} is to switch temporarily to a different execution path +and then resume where execution was stopped. + +@smallexample +@include swapcontext.c.texi +@end smallexample + +This an example how the context functions can be used to implement +co-routines or cooperative multi-threading. All that has to be done is +to call every once in a while @code{swapcontext} to continue running a +different context. It is not recommended to do the context switching from +the signal handler directly since leaving the signal handler via +@code{setcontext} if the signal was delivered during code that was not +asynchronous signal safe could lead to problems. Setting a variable in +the signal handler and checking it in the body of the functions which +are executed is a safer approach. Since @code{swapcontext} is saving the +current context it is possible to have multiple different scheduling points +in the code. Execution will always resume where it was left. diff --git a/REORG.TODO/manual/signal.texi b/REORG.TODO/manual/signal.texi new file mode 100644 index 0000000000..d6a1bfe94a --- /dev/null +++ b/REORG.TODO/manual/signal.texi @@ -0,0 +1,3383 @@ +@node Signal Handling, Program Basics, Non-Local Exits, Top +@c %MENU% How to send, block, and handle signals +@chapter Signal Handling + +@cindex signal +A @dfn{signal} is a software interrupt delivered to a process. The +operating system uses signals to report exceptional situations to an +executing program. Some signals report errors such as references to +invalid memory addresses; others report asynchronous events, such as +disconnection of a phone line. + +@Theglibc{} defines a variety of signal types, each for a +particular kind of event. Some kinds of events make it inadvisable or +impossible for the program to proceed as usual, and the corresponding +signals normally abort the program. Other kinds of signals that report +harmless events are ignored by default. + +If you anticipate an event that causes signals, you can define a handler +function and tell the operating system to run it when that particular +type of signal arrives. + +Finally, one process can send a signal to another process; this allows a +parent process to abort a child, or two related processes to communicate +and synchronize. + +@menu +* Concepts of Signals:: Introduction to the signal facilities. +* Standard Signals:: Particular kinds of signals with + standard names and meanings. +* Signal Actions:: Specifying what happens when a + particular signal is delivered. +* Defining Handlers:: How to write a signal handler function. +* Interrupted Primitives:: Signal handlers affect use of @code{open}, + @code{read}, @code{write} and other functions. +* Generating Signals:: How to send a signal to a process. +* Blocking Signals:: Making the system hold signals temporarily. +* Waiting for a Signal:: Suspending your program until a signal + arrives. +* Signal Stack:: Using a Separate Signal Stack. +* BSD Signal Handling:: Additional functions for backward + compatibility with BSD. +@end menu + +@node Concepts of Signals +@section Basic Concepts of Signals + +This section explains basic concepts of how signals are generated, what +happens after a signal is delivered, and how programs can handle +signals. + +@menu +* Kinds of Signals:: Some examples of what can cause a signal. +* Signal Generation:: Concepts of why and how signals occur. +* Delivery of Signal:: Concepts of what a signal does to the + process. +@end menu + +@node Kinds of Signals +@subsection Some Kinds of Signals + +A signal reports the occurrence of an exceptional event. These are some +of the events that can cause (or @dfn{generate}, or @dfn{raise}) a +signal: + +@itemize @bullet +@item +A program error such as dividing by zero or issuing an address outside +the valid range. + +@item +A user request to interrupt or terminate the program. Most environments +are set up to let a user suspend the program by typing @kbd{C-z}, or +terminate it with @kbd{C-c}. Whatever key sequence is used, the +operating system sends the proper signal to interrupt the process. + +@item +The termination of a child process. + +@item +Expiration of a timer or alarm. + +@item +A call to @code{kill} or @code{raise} by the same process. + +@item +A call to @code{kill} from another process. Signals are a limited but +useful form of interprocess communication. + +@item +An attempt to perform an I/O operation that cannot be done. Examples +are reading from a pipe that has no writer (@pxref{Pipes and FIFOs}), +and reading or writing to a terminal in certain situations (@pxref{Job +Control}). +@end itemize + +Each of these kinds of events (excepting explicit calls to @code{kill} +and @code{raise}) generates its own particular kind of signal. The +various kinds of signals are listed and described in detail in +@ref{Standard Signals}. + +@node Signal Generation +@subsection Concepts of Signal Generation +@cindex generation of signals + +In general, the events that generate signals fall into three major +categories: errors, external events, and explicit requests. + +An error means that a program has done something invalid and cannot +continue execution. But not all kinds of errors generate signals---in +fact, most do not. For example, opening a nonexistent file is an error, +but it does not raise a signal; instead, @code{open} returns @code{-1}. +In general, errors that are necessarily associated with certain library +functions are reported by returning a value that indicates an error. +The errors which raise signals are those which can happen anywhere in +the program, not just in library calls. These include division by zero +and invalid memory addresses. + +An external event generally has to do with I/O or other processes. +These include the arrival of input, the expiration of a timer, and the +termination of a child process. + +An explicit request means the use of a library function such as +@code{kill} whose purpose is specifically to generate a signal. + +Signals may be generated @dfn{synchronously} or @dfn{asynchronously}. A +synchronous signal pertains to a specific action in the program, and is +delivered (unless blocked) during that action. Most errors generate +signals synchronously, and so do explicit requests by a process to +generate a signal for that same process. On some machines, certain +kinds of hardware errors (usually floating-point exceptions) are not +reported completely synchronously, but may arrive a few instructions +later. + +Asynchronous signals are generated by events outside the control of the +process that receives them. These signals arrive at unpredictable times +during execution. External events generate signals asynchronously, and +so do explicit requests that apply to some other process. + +A given type of signal is either typically synchronous or typically +asynchronous. For example, signals for errors are typically synchronous +because errors generate signals synchronously. But any type of signal +can be generated synchronously or asynchronously with an explicit +request. + +@node Delivery of Signal +@subsection How Signals Are Delivered +@cindex delivery of signals +@cindex pending signals +@cindex blocked signals + +When a signal is generated, it becomes @dfn{pending}. Normally it +remains pending for just a short period of time and then is +@dfn{delivered} to the process that was signaled. However, if that kind +of signal is currently @dfn{blocked}, it may remain pending +indefinitely---until signals of that kind are @dfn{unblocked}. Once +unblocked, it will be delivered immediately. @xref{Blocking Signals}. + +@cindex specified action (for a signal) +@cindex default action (for a signal) +@cindex signal action +@cindex catching signals +When the signal is delivered, whether right away or after a long delay, +the @dfn{specified action} for that signal is taken. For certain +signals, such as @code{SIGKILL} and @code{SIGSTOP}, the action is fixed, +but for most signals, the program has a choice: ignore the signal, +specify a @dfn{handler function}, or accept the @dfn{default action} for +that kind of signal. The program specifies its choice using functions +such as @code{signal} or @code{sigaction} (@pxref{Signal Actions}). We +sometimes say that a handler @dfn{catches} the signal. While the +handler is running, that particular signal is normally blocked. + +If the specified action for a kind of signal is to ignore it, then any +such signal which is generated is discarded immediately. This happens +even if the signal is also blocked at the time. A signal discarded in +this way will never be delivered, not even if the program subsequently +specifies a different action for that kind of signal and then unblocks +it. + +If a signal arrives which the program has neither handled nor ignored, +its @dfn{default action} takes place. Each kind of signal has its own +default action, documented below (@pxref{Standard Signals}). For most kinds +of signals, the default action is to terminate the process. For certain +kinds of signals that represent ``harmless'' events, the default action +is to do nothing. + +When a signal terminates a process, its parent process can determine the +cause of termination by examining the termination status code reported +by the @code{wait} or @code{waitpid} functions. (This is discussed in +more detail in @ref{Process Completion}.) The information it can get +includes the fact that termination was due to a signal and the kind of +signal involved. If a program you run from a shell is terminated by a +signal, the shell typically prints some kind of error message. + +The signals that normally represent program errors have a special +property: when one of these signals terminates the process, it also +writes a @dfn{core dump file} which records the state of the process at +the time of termination. You can examine the core dump with a debugger +to investigate what caused the error. + +If you raise a ``program error'' signal by explicit request, and this +terminates the process, it makes a core dump file just as if the signal +had been due directly to an error. + +@node Standard Signals +@section Standard Signals +@cindex signal names +@cindex names of signals + +@pindex signal.h +@cindex signal number +This section lists the names for various standard kinds of signals and +describes what kind of event they mean. Each signal name is a macro +which stands for a positive integer---the @dfn{signal number} for that +kind of signal. Your programs should never make assumptions about the +numeric code for a particular kind of signal, but rather refer to them +always by the names defined here. This is because the number for a +given kind of signal can vary from system to system, but the meanings of +the names are standardized and fairly uniform. + +The signal names are defined in the header file @file{signal.h}. + +@comment signal.h +@comment BSD +@deftypevr Macro int NSIG +The value of this symbolic constant is the total number of signals +defined. Since the signal numbers are allocated consecutively, +@code{NSIG} is also one greater than the largest defined signal number. +@end deftypevr + +@menu +* Program Error Signals:: Used to report serious program errors. +* Termination Signals:: Used to interrupt and/or terminate the + program. +* Alarm Signals:: Used to indicate expiration of timers. +* Asynchronous I/O Signals:: Used to indicate input is available. +* Job Control Signals:: Signals used to support job control. +* Operation Error Signals:: Used to report operational system errors. +* Miscellaneous Signals:: Miscellaneous Signals. +* Signal Messages:: Printing a message describing a signal. +@end menu + +@node Program Error Signals +@subsection Program Error Signals +@cindex program error signals + +The following signals are generated when a serious program error is +detected by the operating system or the computer itself. In general, +all of these signals are indications that your program is seriously +broken in some way, and there's usually no way to continue the +computation which encountered the error. + +Some programs handle program error signals in order to tidy up before +terminating; for example, programs that turn off echoing of terminal +input should handle program error signals in order to turn echoing back +on. The handler should end by specifying the default action for the +signal that happened and then reraising it; this will cause the program +to terminate with that signal, as if it had not had a handler. +(@xref{Termination in Handler}.) + +Termination is the sensible ultimate outcome from a program error in +most programs. However, programming systems such as Lisp that can load +compiled user programs might need to keep executing even if a user +program incurs an error. These programs have handlers which use +@code{longjmp} to return control to the command level. + +The default action for all of these signals is to cause the process to +terminate. If you block or ignore these signals or establish handlers +for them that return normally, your program will probably break horribly +when such signals happen, unless they are generated by @code{raise} or +@code{kill} instead of a real error. + +@vindex COREFILE +When one of these program error signals terminates a process, it also +writes a @dfn{core dump file} which records the state of the process at +the time of termination. The core dump file is named @file{core} and is +written in whichever directory is current in the process at the time. +(On @gnuhurdsystems{}, you can specify the file name for core dumps with +the environment variable @code{COREFILE}.) The purpose of core dump +files is so that you can examine them with a debugger to investigate +what caused the error. + +@comment signal.h +@comment ISO +@deftypevr Macro int SIGFPE +The @code{SIGFPE} signal reports a fatal arithmetic error. Although the +name is derived from ``floating-point exception'', this signal actually +covers all arithmetic errors, including division by zero and overflow. +If a program stores integer data in a location which is then used in a +floating-point operation, this often causes an ``invalid operation'' +exception, because the processor cannot recognize the data as a +floating-point number. +@cindex exception +@cindex floating-point exception + +Actual floating-point exceptions are a complicated subject because there +are many types of exceptions with subtly different meanings, and the +@code{SIGFPE} signal doesn't distinguish between them. The @cite{IEEE +Standard for Binary Floating-Point Arithmetic (ANSI/IEEE Std 754-1985 +and ANSI/IEEE Std 854-1987)} +defines various floating-point exceptions and requires conforming +computer systems to report their occurrences. However, this standard +does not specify how the exceptions are reported, or what kinds of +handling and control the operating system can offer to the programmer. +@end deftypevr + +BSD systems provide the @code{SIGFPE} handler with an extra argument +that distinguishes various causes of the exception. In order to access +this argument, you must define the handler to accept two arguments, +which means you must cast it to a one-argument function type in order to +establish the handler. @Theglibc{} does provide this extra +argument, but the value is meaningful only on operating systems that +provide the information (BSD systems and @gnusystems{}). + +@vtable @code +@comment signal.h +@comment BSD +@item FPE_INTOVF_TRAP +Integer overflow (impossible in a C program unless you enable overflow +trapping in a hardware-specific fashion). +@comment signal.h +@comment BSD +@item FPE_INTDIV_TRAP +Integer division by zero. +@comment signal.h +@comment BSD +@item FPE_SUBRNG_TRAP +Subscript-range (something that C programs never check for). +@comment signal.h +@comment BSD +@item FPE_FLTOVF_TRAP +Floating overflow trap. +@comment signal.h +@comment BSD +@item FPE_FLTDIV_TRAP +Floating/decimal division by zero. +@comment signal.h +@comment BSD +@item FPE_FLTUND_TRAP +Floating underflow trap. (Trapping on floating underflow is not +normally enabled.) +@comment signal.h +@comment BSD +@item FPE_DECOVF_TRAP +Decimal overflow trap. (Only a few machines have decimal arithmetic and +C never uses it.) +@ignore @c These seem redundant +@comment signal.h +@comment BSD +@item FPE_FLTOVF_FAULT +Floating overflow fault. +@comment signal.h +@comment BSD +@item FPE_FLTDIV_FAULT +Floating divide by zero fault. +@comment signal.h +@comment BSD +@item FPE_FLTUND_FAULT +Floating underflow fault. +@end ignore +@end vtable + +@comment signal.h +@comment ISO +@deftypevr Macro int SIGILL +The name of this signal is derived from ``illegal instruction''; it +usually means your program is trying to execute garbage or a privileged +instruction. Since the C compiler generates only valid instructions, +@code{SIGILL} typically indicates that the executable file is corrupted, +or that you are trying to execute data. Some common ways of getting +into the latter situation are by passing an invalid object where a +pointer to a function was expected, or by writing past the end of an +automatic array (or similar problems with pointers to automatic +variables) and corrupting other data on the stack such as the return +address of a stack frame. + +@code{SIGILL} can also be generated when the stack overflows, or when +the system has trouble running the handler for a signal. +@end deftypevr +@cindex illegal instruction + +@comment signal.h +@comment ISO +@deftypevr Macro int SIGSEGV +@cindex segmentation violation +This signal is generated when a program tries to read or write outside +the memory that is allocated for it, or to write memory that can only be +read. (Actually, the signals only occur when the program goes far +enough outside to be detected by the system's memory protection +mechanism.) The name is an abbreviation for ``segmentation violation''. + +Common ways of getting a @code{SIGSEGV} condition include dereferencing +a null or uninitialized pointer, or when you use a pointer to step +through an array, but fail to check for the end of the array. It varies +among systems whether dereferencing a null pointer generates +@code{SIGSEGV} or @code{SIGBUS}. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGBUS +This signal is generated when an invalid pointer is dereferenced. Like +@code{SIGSEGV}, this signal is typically the result of dereferencing an +uninitialized pointer. The difference between the two is that +@code{SIGSEGV} indicates an invalid access to valid memory, while +@code{SIGBUS} indicates an access to an invalid address. In particular, +@code{SIGBUS} signals often result from dereferencing a misaligned +pointer, such as referring to a four-word integer at an address not +divisible by four. (Each kind of computer has its own requirements for +address alignment.) + +The name of this signal is an abbreviation for ``bus error''. +@end deftypevr +@cindex bus error + +@comment signal.h +@comment ISO +@deftypevr Macro int SIGABRT +@cindex abort signal +This signal indicates an error detected by the program itself and +reported by calling @code{abort}. @xref{Aborting a Program}. +@end deftypevr + +@comment signal.h +@comment Unix +@deftypevr Macro int SIGIOT +Generated by the PDP-11 ``iot'' instruction. On most machines, this is +just another name for @code{SIGABRT}. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGTRAP +Generated by the machine's breakpoint instruction, and possibly other +trap instructions. This signal is used by debuggers. Your program will +probably only see @code{SIGTRAP} if it is somehow executing bad +instructions. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGEMT +Emulator trap; this results from certain unimplemented instructions +which might be emulated in software, or the operating system's +failure to properly emulate them. +@end deftypevr + +@comment signal.h +@comment Unix +@deftypevr Macro int SIGSYS +Bad system call; that is to say, the instruction to trap to the +operating system was executed, but the code number for the system call +to perform was invalid. +@end deftypevr + +@node Termination Signals +@subsection Termination Signals +@cindex program termination signals + +These signals are all used to tell a process to terminate, in one way +or another. They have different names because they're used for slightly +different purposes, and programs might want to handle them differently. + +The reason for handling these signals is usually so your program can +tidy up as appropriate before actually terminating. For example, you +might want to save state information, delete temporary files, or restore +the previous terminal modes. Such a handler should end by specifying +the default action for the signal that happened and then reraising it; +this will cause the program to terminate with that signal, as if it had +not had a handler. (@xref{Termination in Handler}.) + +The (obvious) default action for all of these signals is to cause the +process to terminate. + +@comment signal.h +@comment ISO +@deftypevr Macro int SIGTERM +@cindex termination signal +The @code{SIGTERM} signal is a generic signal used to cause program +termination. Unlike @code{SIGKILL}, this signal can be blocked, +handled, and ignored. It is the normal way to politely ask a program to +terminate. + +The shell command @code{kill} generates @code{SIGTERM} by default. +@pindex kill +@end deftypevr + +@comment signal.h +@comment ISO +@deftypevr Macro int SIGINT +@cindex interrupt signal +The @code{SIGINT} (``program interrupt'') signal is sent when the user +types the INTR character (normally @kbd{C-c}). @xref{Special +Characters}, for information about terminal driver support for +@kbd{C-c}. +@end deftypevr + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGQUIT +@cindex quit signal +@cindex quit signal +The @code{SIGQUIT} signal is similar to @code{SIGINT}, except that it's +controlled by a different key---the QUIT character, usually +@kbd{C-\}---and produces a core dump when it terminates the process, +just like a program error signal. You can think of this as a +program error condition ``detected'' by the user. + +@xref{Program Error Signals}, for information about core dumps. +@xref{Special Characters}, for information about terminal driver +support. + +Certain kinds of cleanups are best omitted in handling @code{SIGQUIT}. +For example, if the program creates temporary files, it should handle +the other termination requests by deleting the temporary files. But it +is better for @code{SIGQUIT} not to delete them, so that the user can +examine them in conjunction with the core dump. +@end deftypevr + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGKILL +The @code{SIGKILL} signal is used to cause immediate program termination. +It cannot be handled or ignored, and is therefore always fatal. It is +also not possible to block this signal. + +This signal is usually generated only by explicit request. Since it +cannot be handled, you should generate it only as a last resort, after +first trying a less drastic method such as @kbd{C-c} or @code{SIGTERM}. +If a process does not respond to any other termination signals, sending +it a @code{SIGKILL} signal will almost always cause it to go away. + +In fact, if @code{SIGKILL} fails to terminate a process, that by itself +constitutes an operating system bug which you should report. + +The system will generate @code{SIGKILL} for a process itself under some +unusual conditions where the program cannot possibly continue to run +(even to run a signal handler). +@end deftypevr +@cindex kill signal + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGHUP +@cindex hangup signal +The @code{SIGHUP} (``hang-up'') signal is used to report that the user's +terminal is disconnected, perhaps because a network or telephone +connection was broken. For more information about this, see @ref{Control +Modes}. + +This signal is also used to report the termination of the controlling +process on a terminal to jobs associated with that session; this +termination effectively disconnects all processes in the session from +the controlling terminal. For more information, see @ref{Termination +Internals}. +@end deftypevr + +@node Alarm Signals +@subsection Alarm Signals + +These signals are used to indicate the expiration of timers. +@xref{Setting an Alarm}, for information about functions that cause +these signals to be sent. + +The default behavior for these signals is to cause program termination. +This default is rarely useful, but no other default would be useful; +most of the ways of using these signals would require handler functions +in any case. + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGALRM +This signal typically indicates expiration of a timer that measures real +or clock time. It is used by the @code{alarm} function, for example. +@end deftypevr +@cindex alarm signal + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGVTALRM +This signal typically indicates expiration of a timer that measures CPU +time used by the current process. The name is an abbreviation for +``virtual time alarm''. +@end deftypevr +@cindex virtual time alarm signal + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGPROF +This signal typically indicates expiration of a timer that measures +both CPU time used by the current process, and CPU time expended on +behalf of the process by the system. Such a timer is used to implement +code profiling facilities, hence the name of this signal. +@end deftypevr +@cindex profiling alarm signal + + +@node Asynchronous I/O Signals +@subsection Asynchronous I/O Signals + +The signals listed in this section are used in conjunction with +asynchronous I/O facilities. You have to take explicit action by +calling @code{fcntl} to enable a particular file descriptor to generate +these signals (@pxref{Interrupt Input}). The default action for these +signals is to ignore them. + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGIO +@cindex input available signal +@cindex output possible signal +This signal is sent when a file descriptor is ready to perform input +or output. + +On most operating systems, terminals and sockets are the only kinds of +files that can generate @code{SIGIO}; other kinds, including ordinary +files, never generate @code{SIGIO} even if you ask them to. + +On @gnusystems{} @code{SIGIO} will always be generated properly +if you successfully set asynchronous mode with @code{fcntl}. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGURG +@cindex urgent data signal +This signal is sent when ``urgent'' or out-of-band data arrives on a +socket. @xref{Out-of-Band Data}. +@end deftypevr + +@comment signal.h +@comment SVID +@deftypevr Macro int SIGPOLL +This is a System V signal name, more or less similar to @code{SIGIO}. +It is defined only for compatibility. +@end deftypevr + +@node Job Control Signals +@subsection Job Control Signals +@cindex job control signals + +These signals are used to support job control. If your system +doesn't support job control, then these macros are defined but the +signals themselves can't be raised or handled. + +You should generally leave these signals alone unless you really +understand how job control works. @xref{Job Control}. + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGCHLD +@cindex child process signal +This signal is sent to a parent process whenever one of its child +processes terminates or stops. + +The default action for this signal is to ignore it. If you establish a +handler for this signal while there are child processes that have +terminated but not reported their status via @code{wait} or +@code{waitpid} (@pxref{Process Completion}), whether your new handler +applies to those processes or not depends on the particular operating +system. +@end deftypevr + +@comment signal.h +@comment SVID +@deftypevr Macro int SIGCLD +This is an obsolete name for @code{SIGCHLD}. +@end deftypevr + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGCONT +@cindex continue signal +You can send a @code{SIGCONT} signal to a process to make it continue. +This signal is special---it always makes the process continue if it is +stopped, before the signal is delivered. The default behavior is to do +nothing else. You cannot block this signal. You can set a handler, but +@code{SIGCONT} always makes the process continue regardless. + +Most programs have no reason to handle @code{SIGCONT}; they simply +resume execution without realizing they were ever stopped. You can use +a handler for @code{SIGCONT} to make a program do something special when +it is stopped and continued---for example, to reprint a prompt when it +is suspended while waiting for input. +@end deftypevr + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGSTOP +The @code{SIGSTOP} signal stops the process. It cannot be handled, +ignored, or blocked. +@end deftypevr +@cindex stop signal + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGTSTP +The @code{SIGTSTP} signal is an interactive stop signal. Unlike +@code{SIGSTOP}, this signal can be handled and ignored. + +Your program should handle this signal if you have a special need to +leave files or system tables in a secure state when a process is +stopped. For example, programs that turn off echoing should handle +@code{SIGTSTP} so they can turn echoing back on before stopping. + +This signal is generated when the user types the SUSP character +(normally @kbd{C-z}). For more information about terminal driver +support, see @ref{Special Characters}. +@end deftypevr +@cindex interactive stop signal + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGTTIN +A process cannot read from the user's terminal while it is running +as a background job. When any process in a background job tries to +read from the terminal, all of the processes in the job are sent a +@code{SIGTTIN} signal. The default action for this signal is to +stop the process. For more information about how this interacts with +the terminal driver, see @ref{Access to the Terminal}. +@end deftypevr +@cindex terminal input signal + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGTTOU +This is similar to @code{SIGTTIN}, but is generated when a process in a +background job attempts to write to the terminal or set its modes. +Again, the default action is to stop the process. @code{SIGTTOU} is +only generated for an attempt to write to the terminal if the +@code{TOSTOP} output mode is set; @pxref{Output Modes}. +@end deftypevr +@cindex terminal output signal + +While a process is stopped, no more signals can be delivered to it until +it is continued, except @code{SIGKILL} signals and (obviously) +@code{SIGCONT} signals. The signals are marked as pending, but not +delivered until the process is continued. The @code{SIGKILL} signal +always causes termination of the process and can't be blocked, handled +or ignored. You can ignore @code{SIGCONT}, but it always causes the +process to be continued anyway if it is stopped. Sending a +@code{SIGCONT} signal to a process causes any pending stop signals for +that process to be discarded. Likewise, any pending @code{SIGCONT} +signals for a process are discarded when it receives a stop signal. + +When a process in an orphaned process group (@pxref{Orphaned Process +Groups}) receives a @code{SIGTSTP}, @code{SIGTTIN}, or @code{SIGTTOU} +signal and does not handle it, the process does not stop. Stopping the +process would probably not be very useful, since there is no shell +program that will notice it stop and allow the user to continue it. +What happens instead depends on the operating system you are using. +Some systems may do nothing; others may deliver another signal instead, +such as @code{SIGKILL} or @code{SIGHUP}. On @gnuhurdsystems{}, the process +dies with @code{SIGKILL}; this avoids the problem of many stopped, +orphaned processes lying around the system. + +@ignore +On @gnuhurdsystems{}, it is possible to reattach to the orphaned process +group and continue it, so stop signals do stop the process as usual on +@gnuhurdsystems{} unless you have requested POSIX compatibility ``till it +hurts.'' +@end ignore + +@node Operation Error Signals +@subsection Operation Error Signals + +These signals are used to report various errors generated by an +operation done by the program. They do not necessarily indicate a +programming error in the program, but an error that prevents an +operating system call from completing. The default action for all of +them is to cause the process to terminate. + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGPIPE +@cindex pipe signal +@cindex broken pipe signal +Broken pipe. If you use pipes or FIFOs, you have to design your +application so that one process opens the pipe for reading before +another starts writing. If the reading process never starts, or +terminates unexpectedly, writing to the pipe or FIFO raises a +@code{SIGPIPE} signal. If @code{SIGPIPE} is blocked, handled or +ignored, the offending call fails with @code{EPIPE} instead. + +Pipes and FIFO special files are discussed in more detail in @ref{Pipes +and FIFOs}. + +Another cause of @code{SIGPIPE} is when you try to output to a socket +that isn't connected. @xref{Sending Data}. +@end deftypevr + +@comment signal.h +@comment GNU +@deftypevr Macro int SIGLOST +@cindex lost resource signal +Resource lost. This signal is generated when you have an advisory lock +on an NFS file, and the NFS server reboots and forgets about your lock. + +On @gnuhurdsystems{}, @code{SIGLOST} is generated when any server program +dies unexpectedly. It is usually fine to ignore the signal; whatever +call was made to the server that died just returns an error. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGXCPU +CPU time limit exceeded. This signal is generated when the process +exceeds its soft resource limit on CPU time. @xref{Limits on Resources}. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGXFSZ +File size limit exceeded. This signal is generated when the process +attempts to extend a file so it exceeds the process's soft resource +limit on file size. @xref{Limits on Resources}. +@end deftypevr + +@node Miscellaneous Signals +@subsection Miscellaneous Signals + +These signals are used for various other purposes. In general, they +will not affect your program unless it explicitly uses them for something. + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SIGUSR1 +@comment signal.h +@comment POSIX.1 +@deftypevrx Macro int SIGUSR2 +@cindex user signals +The @code{SIGUSR1} and @code{SIGUSR2} signals are set aside for you to +use any way you want. They're useful for simple interprocess +communication, if you write a signal handler for them in the program +that receives the signal. + +There is an example showing the use of @code{SIGUSR1} and @code{SIGUSR2} +in @ref{Signaling Another Process}. + +The default action is to terminate the process. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGWINCH +Window size change. This is generated on some systems (including GNU) +when the terminal driver's record of the number of rows and columns on +the screen is changed. The default action is to ignore it. + +If a program does full-screen display, it should handle @code{SIGWINCH}. +When the signal arrives, it should fetch the new screen size and +reformat its display accordingly. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SIGINFO +Information request. On 4.4 BSD and @gnuhurdsystems{}, this signal is sent +to all the processes in the foreground process group of the controlling +terminal when the user types the STATUS character in canonical mode; +@pxref{Signal Characters}. + +If the process is the leader of the process group, the default action is +to print some status information about the system and what the process +is doing. Otherwise the default is to do nothing. +@end deftypevr + +@node Signal Messages +@subsection Signal Messages +@cindex signal messages + +We mentioned above that the shell prints a message describing the signal +that terminated a child process. The clean way to print a message +describing a signal is to use the functions @code{strsignal} and +@code{psignal}. These functions use a signal number to specify which +kind of signal to describe. The signal number may come from the +termination status of a child process (@pxref{Process Completion}) or it +may come from a signal handler in the same process. + +@comment string.h +@comment GNU +@deftypefun {char *} strsignal (int @var{signum}) +@safety{@prelim{}@mtunsafe{@mtasurace{:strsignal} @mtslocale{}}@asunsafe{@asuinit{} @ascuintl{} @asucorrupt{} @ascuheap{}}@acunsafe{@acuinit{} @acucorrupt{} @acsmem{}}} +@c strsignal @mtasurace:strsignal @mtslocale @asuinit @ascuintl @asucorrupt @ascuheap @acucorrupt @acsmem +@c uses a static buffer if tsd key creation fails +@c [once] init +@c libc_key_create ok +@c pthread_key_create dup ok +@c getbuffer @asucorrupt @ascuheap @acsmem +@c libc_getspecific ok +@c pthread_getspecific dup ok +@c malloc dup @ascuheap @acsmem +@c libc_setspecific @asucorrupt @ascuheap @acucorrupt @acsmem +@c pthread_setspecific dup @asucorrupt @ascuheap @acucorrupt @acsmem +@c snprintf dup @mtslocale @ascuheap @acsmem +@c _ @ascuintl +This function returns a pointer to a statically-allocated string +containing a message describing the signal @var{signum}. You +should not modify the contents of this string; and, since it can be +rewritten on subsequent calls, you should save a copy of it if you need +to reference it later. + +@pindex string.h +This function is a GNU extension, declared in the header file +@file{string.h}. +@end deftypefun + +@comment signal.h +@comment BSD +@deftypefun void psignal (int @var{signum}, const char *@var{message}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuintl{} @ascuheap{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{}}} +@c psignal @mtslocale @asucorrupt @ascuintl @ascuheap @aculock @acucorrupt @acsmem +@c _ @ascuintl +@c fxprintf @asucorrupt @aculock @acucorrupt +@c asprintf @mtslocale @ascuheap @acsmem +@c free dup @ascuheap @acsmem +This function prints a message describing the signal @var{signum} to the +standard error output stream @code{stderr}; see @ref{Standard Streams}. + +If you call @code{psignal} with a @var{message} that is either a null +pointer or an empty string, @code{psignal} just prints the message +corresponding to @var{signum}, adding a trailing newline. + +If you supply a non-null @var{message} argument, then @code{psignal} +prefixes its output with this string. It adds a colon and a space +character to separate the @var{message} from the string corresponding +to @var{signum}. + +@pindex stdio.h +This function is a BSD feature, declared in the header file @file{signal.h}. +@end deftypefun + +@vindex sys_siglist +There is also an array @code{sys_siglist} which contains the messages +for the various signal codes. This array exists on BSD systems, unlike +@code{strsignal}. + +@node Signal Actions +@section Specifying Signal Actions +@cindex signal actions +@cindex establishing a handler + +The simplest way to change the action for a signal is to use the +@code{signal} function. You can specify a built-in action (such as to +ignore the signal), or you can @dfn{establish a handler}. + +@Theglibc{} also implements the more versatile @code{sigaction} +facility. This section describes both facilities and gives suggestions +on which to use when. + +@menu +* Basic Signal Handling:: The simple @code{signal} function. +* Advanced Signal Handling:: The more powerful @code{sigaction} function. +* Signal and Sigaction:: How those two functions interact. +* Sigaction Function Example:: An example of using the sigaction function. +* Flags for Sigaction:: Specifying options for signal handling. +* Initial Signal Actions:: How programs inherit signal actions. +@end menu + +@node Basic Signal Handling +@subsection Basic Signal Handling +@cindex @code{signal} function + +The @code{signal} function provides a simple interface for establishing +an action for a particular signal. The function and associated macros +are declared in the header file @file{signal.h}. +@pindex signal.h + +@comment signal.h +@comment GNU +@deftp {Data Type} sighandler_t +This is the type of signal handler functions. Signal handlers take one +integer argument specifying the signal number, and have return type +@code{void}. So, you should define handler functions like this: + +@smallexample +void @var{handler} (int @code{signum}) @{ @dots{} @} +@end smallexample + +The name @code{sighandler_t} for this data type is a GNU extension. +@end deftp + +@comment signal.h +@comment ISO +@deftypefun sighandler_t signal (int @var{signum}, sighandler_t @var{action}) +@safety{@prelim{}@mtsafe{@mtssigintr{}}@assafe{}@acsafe{}} +@c signal ok +@c sigemptyset dup ok +@c sigaddset dup ok +@c sigismember dup ok +@c sigaction dup ok +The @code{signal} function establishes @var{action} as the action for +the signal @var{signum}. + +The first argument, @var{signum}, identifies the signal whose behavior +you want to control, and should be a signal number. The proper way to +specify a signal number is with one of the symbolic signal names +(@pxref{Standard Signals})---don't use an explicit number, because +the numerical code for a given kind of signal may vary from operating +system to operating system. + +The second argument, @var{action}, specifies the action to use for the +signal @var{signum}. This can be one of the following: + +@table @code +@item SIG_DFL +@vindex SIG_DFL +@cindex default action for a signal +@code{SIG_DFL} specifies the default action for the particular signal. +The default actions for various kinds of signals are stated in +@ref{Standard Signals}. + +@item SIG_IGN +@vindex SIG_IGN +@cindex ignore action for a signal +@code{SIG_IGN} specifies that the signal should be ignored. + +Your program generally should not ignore signals that represent serious +events or that are normally used to request termination. You cannot +ignore the @code{SIGKILL} or @code{SIGSTOP} signals at all. You can +ignore program error signals like @code{SIGSEGV}, but ignoring the error +won't enable the program to continue executing meaningfully. Ignoring +user requests such as @code{SIGINT}, @code{SIGQUIT}, and @code{SIGTSTP} +is unfriendly. + +When you do not wish signals to be delivered during a certain part of +the program, the thing to do is to block them, not ignore them. +@xref{Blocking Signals}. + +@item @var{handler} +Supply the address of a handler function in your program, to specify +running this handler as the way to deliver the signal. + +For more information about defining signal handler functions, +see @ref{Defining Handlers}. +@end table + +If you set the action for a signal to @code{SIG_IGN}, or if you set it +to @code{SIG_DFL} and the default action is to ignore that signal, then +any pending signals of that type are discarded (even if they are +blocked). Discarding the pending signals means that they will never be +delivered, not even if you subsequently specify another action and +unblock this kind of signal. + +The @code{signal} function returns the action that was previously in +effect for the specified @var{signum}. You can save this value and +restore it later by calling @code{signal} again. + +If @code{signal} can't honor the request, it returns @code{SIG_ERR} +instead. The following @code{errno} error conditions are defined for +this function: + +@table @code +@item EINVAL +You specified an invalid @var{signum}; or you tried to ignore or provide +a handler for @code{SIGKILL} or @code{SIGSTOP}. +@end table +@end deftypefun + +@strong{Compatibility Note:} A problem encountered when working with the +@code{signal} function is that it has different semantics on BSD and +SVID systems. The difference is that on SVID systems the signal handler +is deinstalled after signal delivery. On BSD systems the +handler must be explicitly deinstalled. In @theglibc{} we use the +BSD version by default. To use the SVID version you can either use the +function @code{sysv_signal} (see below) or use the @code{_XOPEN_SOURCE} +feature select macro (@pxref{Feature Test Macros}). In general, use of these +functions should be avoided because of compatibility problems. It +is better to use @code{sigaction} if it is available since the results +are much more reliable. + +Here is a simple example of setting up a handler to delete temporary +files when certain fatal signals happen: + +@smallexample +#include <signal.h> + +void +termination_handler (int signum) +@{ + struct temp_file *p; + + for (p = temp_file_list; p; p = p->next) + unlink (p->name); +@} + +int +main (void) +@{ + @dots{} + if (signal (SIGINT, termination_handler) == SIG_IGN) + signal (SIGINT, SIG_IGN); + if (signal (SIGHUP, termination_handler) == SIG_IGN) + signal (SIGHUP, SIG_IGN); + if (signal (SIGTERM, termination_handler) == SIG_IGN) + signal (SIGTERM, SIG_IGN); + @dots{} +@} +@end smallexample + +@noindent +Note that if a given signal was previously set to be ignored, this code +avoids altering that setting. This is because non-job-control shells +often ignore certain signals when starting children, and it is important +for the children to respect this. + +We do not handle @code{SIGQUIT} or the program error signals in this +example because these are designed to provide information for debugging +(a core dump), and the temporary files may give useful information. + +@comment signal.h +@comment GNU +@deftypefun sighandler_t sysv_signal (int @var{signum}, sighandler_t @var{action}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c sysv_signal ok +@c sigemptyset dup ok +@c sigaction dup ok +The @code{sysv_signal} implements the behavior of the standard +@code{signal} function as found on SVID systems. The difference to BSD +systems is that the handler is deinstalled after a delivery of a signal. + +@strong{Compatibility Note:} As said above for @code{signal}, this +function should be avoided when possible. @code{sigaction} is the +preferred method. +@end deftypefun + +@comment signal.h +@comment SVID +@deftypefun sighandler_t ssignal (int @var{signum}, sighandler_t @var{action}) +@safety{@prelim{}@mtsafe{@mtssigintr{}}@assafe{}@acsafe{}} +@c Aliases signal and bsd_signal. +The @code{ssignal} function does the same thing as @code{signal}; it is +provided only for compatibility with SVID. +@end deftypefun + +@comment signal.h +@comment ISO +@deftypevr Macro sighandler_t SIG_ERR +The value of this macro is used as the return value from @code{signal} +to indicate an error. +@end deftypevr + +@ignore +@comment RMS says that ``we don't do this''. +Implementations might define additional macros for built-in signal +actions that are suitable as a @var{action} argument to @code{signal}, +besides @code{SIG_IGN} and @code{SIG_DFL}. Identifiers whose names +begin with @samp{SIG_} followed by an uppercase letter are reserved for +this purpose. +@end ignore + + +@node Advanced Signal Handling +@subsection Advanced Signal Handling +@cindex @code{sigaction} function + +The @code{sigaction} function has the same basic effect as +@code{signal}: to specify how a signal should be handled by the process. +However, @code{sigaction} offers more control, at the expense of more +complexity. In particular, @code{sigaction} allows you to specify +additional flags to control when the signal is generated and how the +handler is invoked. + +The @code{sigaction} function is declared in @file{signal.h}. +@pindex signal.h + +@comment signal.h +@comment POSIX.1 +@deftp {Data Type} {struct sigaction} +Structures of type @code{struct sigaction} are used in the +@code{sigaction} function to specify all the information about how to +handle a particular signal. This structure contains at least the +following members: + +@table @code +@item sighandler_t sa_handler +This is used in the same way as the @var{action} argument to the +@code{signal} function. The value can be @code{SIG_DFL}, +@code{SIG_IGN}, or a function pointer. @xref{Basic Signal Handling}. + +@item sigset_t sa_mask +This specifies a set of signals to be blocked while the handler runs. +Blocking is explained in @ref{Blocking for Handler}. Note that the +signal that was delivered is automatically blocked by default before its +handler is started; this is true regardless of the value in +@code{sa_mask}. If you want that signal not to be blocked within its +handler, you must write code in the handler to unblock it. + +@item int sa_flags +This specifies various flags which can affect the behavior of +the signal. These are described in more detail in @ref{Flags for Sigaction}. +@end table +@end deftp + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigaction (int @var{signum}, const struct sigaction *restrict @var{action}, struct sigaction *restrict @var{old-action}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @var{action} argument is used to set up a new action for the signal +@var{signum}, while the @var{old-action} argument is used to return +information about the action previously associated with this signal. +(In other words, @var{old-action} has the same purpose as the +@code{signal} function's return value---you can check to see what the +old action in effect for the signal was, and restore it later if you +want.) + +Either @var{action} or @var{old-action} can be a null pointer. If +@var{old-action} is a null pointer, this simply suppresses the return +of information about the old action. If @var{action} is a null pointer, +the action associated with the signal @var{signum} is unchanged; this +allows you to inquire about how a signal is being handled without changing +that handling. + +The return value from @code{sigaction} is zero if it succeeds, and +@code{-1} on failure. The following @code{errno} error conditions are +defined for this function: + +@table @code +@item EINVAL +The @var{signum} argument is not valid, or you are trying to +trap or ignore @code{SIGKILL} or @code{SIGSTOP}. +@end table +@end deftypefun + +@node Signal and Sigaction +@subsection Interaction of @code{signal} and @code{sigaction} + +It's possible to use both the @code{signal} and @code{sigaction} +functions within a single program, but you have to be careful because +they can interact in slightly strange ways. + +The @code{sigaction} function specifies more information than the +@code{signal} function, so the return value from @code{signal} cannot +express the full range of @code{sigaction} possibilities. Therefore, if +you use @code{signal} to save and later reestablish an action, it may +not be able to reestablish properly a handler that was established with +@code{sigaction}. + +To avoid having problems as a result, always use @code{sigaction} to +save and restore a handler if your program uses @code{sigaction} at all. +Since @code{sigaction} is more general, it can properly save and +reestablish any action, regardless of whether it was established +originally with @code{signal} or @code{sigaction}. + +On some systems if you establish an action with @code{signal} and then +examine it with @code{sigaction}, the handler address that you get may +not be the same as what you specified with @code{signal}. It may not +even be suitable for use as an action argument with @code{signal}. But +you can rely on using it as an argument to @code{sigaction}. This +problem never happens on @gnusystems{}. + +So, you're better off using one or the other of the mechanisms +consistently within a single program. + +@strong{Portability Note:} The basic @code{signal} function is a feature +of @w{ISO C}, while @code{sigaction} is part of the POSIX.1 standard. If +you are concerned about portability to non-POSIX systems, then you +should use the @code{signal} function instead. + +@node Sigaction Function Example +@subsection @code{sigaction} Function Example + +In @ref{Basic Signal Handling}, we gave an example of establishing a +simple handler for termination signals using @code{signal}. Here is an +equivalent example using @code{sigaction}: + +@smallexample +#include <signal.h> + +void +termination_handler (int signum) +@{ + struct temp_file *p; + + for (p = temp_file_list; p; p = p->next) + unlink (p->name); +@} + +int +main (void) +@{ + @dots{} + struct sigaction new_action, old_action; + + /* @r{Set up the structure to specify the new action.} */ + new_action.sa_handler = termination_handler; + sigemptyset (&new_action.sa_mask); + new_action.sa_flags = 0; + + sigaction (SIGINT, NULL, &old_action); + if (old_action.sa_handler != SIG_IGN) + sigaction (SIGINT, &new_action, NULL); + sigaction (SIGHUP, NULL, &old_action); + if (old_action.sa_handler != SIG_IGN) + sigaction (SIGHUP, &new_action, NULL); + sigaction (SIGTERM, NULL, &old_action); + if (old_action.sa_handler != SIG_IGN) + sigaction (SIGTERM, &new_action, NULL); + @dots{} +@} +@end smallexample + +The program just loads the @code{new_action} structure with the desired +parameters and passes it in the @code{sigaction} call. The usage of +@code{sigemptyset} is described later; see @ref{Blocking Signals}. + +As in the example using @code{signal}, we avoid handling signals +previously set to be ignored. Here we can avoid altering the signal +handler even momentarily, by using the feature of @code{sigaction} that +lets us examine the current action without specifying a new one. + +Here is another example. It retrieves information about the current +action for @code{SIGINT} without changing that action. + +@smallexample +struct sigaction query_action; + +if (sigaction (SIGINT, NULL, &query_action) < 0) + /* @r{@code{sigaction} returns -1 in case of error.} */ +else if (query_action.sa_handler == SIG_DFL) + /* @r{@code{SIGINT} is handled in the default, fatal manner.} */ +else if (query_action.sa_handler == SIG_IGN) + /* @r{@code{SIGINT} is ignored.} */ +else + /* @r{A programmer-defined signal handler is in effect.} */ +@end smallexample + +@node Flags for Sigaction +@subsection Flags for @code{sigaction} +@cindex signal flags +@cindex flags for @code{sigaction} +@cindex @code{sigaction} flags + +The @code{sa_flags} member of the @code{sigaction} structure is a +catch-all for special features. Most of the time, @code{SA_RESTART} is +a good value to use for this field. + +The value of @code{sa_flags} is interpreted as a bit mask. Thus, you +should choose the flags you want to set, @sc{or} those flags together, +and store the result in the @code{sa_flags} member of your +@code{sigaction} structure. + +Each signal number has its own set of flags. Each call to +@code{sigaction} affects one particular signal number, and the flags +that you specify apply only to that particular signal. + +In @theglibc{}, establishing a handler with @code{signal} sets all +the flags to zero except for @code{SA_RESTART}, whose value depends on +the settings you have made with @code{siginterrupt}. @xref{Interrupted +Primitives}, to see what this is about. + +@pindex signal.h +These macros are defined in the header file @file{signal.h}. + +@comment signal.h +@comment POSIX.1 +@deftypevr Macro int SA_NOCLDSTOP +This flag is meaningful only for the @code{SIGCHLD} signal. When the +flag is set, the system delivers the signal for a terminated child +process but not for one that is stopped. By default, @code{SIGCHLD} is +delivered for both terminated children and stopped children. + +Setting this flag for a signal other than @code{SIGCHLD} has no effect. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SA_ONSTACK +If this flag is set for a particular signal number, the system uses the +signal stack when delivering that kind of signal. @xref{Signal Stack}. +If a signal with this flag arrives and you have not set a signal stack, +the system terminates the program with @code{SIGILL}. +@end deftypevr + +@comment signal.h +@comment BSD +@deftypevr Macro int SA_RESTART +This flag controls what happens when a signal is delivered during +certain primitives (such as @code{open}, @code{read} or @code{write}), +and the signal handler returns normally. There are two alternatives: +the library function can resume, or it can return failure with error +code @code{EINTR}. + +The choice is controlled by the @code{SA_RESTART} flag for the +particular kind of signal that was delivered. If the flag is set, +returning from a handler resumes the library function. If the flag is +clear, returning from a handler makes the function fail. +@xref{Interrupted Primitives}. +@end deftypevr + +@node Initial Signal Actions +@subsection Initial Signal Actions +@cindex initial signal actions + +When a new process is created (@pxref{Creating a Process}), it inherits +handling of signals from its parent process. However, when you load a +new process image using the @code{exec} function (@pxref{Executing a +File}), any signals that you've defined your own handlers for revert to +their @code{SIG_DFL} handling. (If you think about it a little, this +makes sense; the handler functions from the old program are specific to +that program, and aren't even present in the address space of the new +program image.) Of course, the new program can establish its own +handlers. + +When a program is run by a shell, the shell normally sets the initial +actions for the child process to @code{SIG_DFL} or @code{SIG_IGN}, as +appropriate. It's a good idea to check to make sure that the shell has +not set up an initial action of @code{SIG_IGN} before you establish your +own signal handlers. + +Here is an example of how to establish a handler for @code{SIGHUP}, but +not if @code{SIGHUP} is currently ignored: + +@smallexample +@group +@dots{} +struct sigaction temp; + +sigaction (SIGHUP, NULL, &temp); + +if (temp.sa_handler != SIG_IGN) + @{ + temp.sa_handler = handle_sighup; + sigemptyset (&temp.sa_mask); + sigaction (SIGHUP, &temp, NULL); + @} +@end group +@end smallexample + +@node Defining Handlers +@section Defining Signal Handlers +@cindex signal handler function + +This section describes how to write a signal handler function that can +be established with the @code{signal} or @code{sigaction} functions. + +A signal handler is just a function that you compile together with the +rest of the program. Instead of directly invoking the function, you use +@code{signal} or @code{sigaction} to tell the operating system to call +it when a signal arrives. This is known as @dfn{establishing} the +handler. @xref{Signal Actions}. + +There are two basic strategies you can use in signal handler functions: + +@itemize @bullet +@item +You can have the handler function note that the signal arrived by +tweaking some global data structures, and then return normally. + +@item +You can have the handler function terminate the program or transfer +control to a point where it can recover from the situation that caused +the signal. +@end itemize + +You need to take special care in writing handler functions because they +can be called asynchronously. That is, a handler might be called at any +point in the program, unpredictably. If two signals arrive during a +very short interval, one handler can run within another. This section +describes what your handler should do, and what you should avoid. + +@menu +* Handler Returns:: Handlers that return normally, and what + this means. +* Termination in Handler:: How handler functions terminate a program. +* Longjmp in Handler:: Nonlocal transfer of control out of a + signal handler. +* Signals in Handler:: What happens when signals arrive while + the handler is already occupied. +* Merged Signals:: When a second signal arrives before the + first is handled. +* Nonreentrancy:: Do not call any functions unless you know they + are reentrant with respect to signals. +* Atomic Data Access:: A single handler can run in the middle of + reading or writing a single object. +@end menu + +@node Handler Returns +@subsection Signal Handlers that Return + +Handlers which return normally are usually used for signals such as +@code{SIGALRM} and the I/O and interprocess communication signals. But +a handler for @code{SIGINT} might also return normally after setting a +flag that tells the program to exit at a convenient time. + +It is not safe to return normally from the handler for a program error +signal, because the behavior of the program when the handler function +returns is not defined after a program error. @xref{Program Error +Signals}. + +Handlers that return normally must modify some global variable in order +to have any effect. Typically, the variable is one that is examined +periodically by the program during normal operation. Its data type +should be @code{sig_atomic_t} for reasons described in @ref{Atomic +Data Access}. + +Here is a simple example of such a program. It executes the body of +the loop until it has noticed that a @code{SIGALRM} signal has arrived. +This technique is useful because it allows the iteration in progress +when the signal arrives to complete before the loop exits. + +@smallexample +@include sigh1.c.texi +@end smallexample + +@node Termination in Handler +@subsection Handlers That Terminate the Process + +Handler functions that terminate the program are typically used to cause +orderly cleanup or recovery from program error signals and interactive +interrupts. + +The cleanest way for a handler to terminate the process is to raise the +same signal that ran the handler in the first place. Here is how to do +this: + +@smallexample +volatile sig_atomic_t fatal_error_in_progress = 0; + +void +fatal_error_signal (int sig) +@{ +@group + /* @r{Since this handler is established for more than one kind of signal, } + @r{it might still get invoked recursively by delivery of some other kind} + @r{of signal. Use a static variable to keep track of that.} */ + if (fatal_error_in_progress) + raise (sig); + fatal_error_in_progress = 1; +@end group + +@group + /* @r{Now do the clean up actions:} + @r{- reset terminal modes} + @r{- kill child processes} + @r{- remove lock files} */ + @dots{} +@end group + +@group + /* @r{Now reraise the signal. We reactivate the signal's} + @r{default handling, which is to terminate the process.} + @r{We could just call @code{exit} or @code{abort},} + @r{but reraising the signal sets the return status} + @r{from the process correctly.} */ + signal (sig, SIG_DFL); + raise (sig); +@} +@end group +@end smallexample + +@node Longjmp in Handler +@subsection Nonlocal Control Transfer in Handlers +@cindex non-local exit, from signal handler + +You can do a nonlocal transfer of control out of a signal handler using +the @code{setjmp} and @code{longjmp} facilities (@pxref{Non-Local +Exits}). + +When the handler does a nonlocal control transfer, the part of the +program that was running will not continue. If this part of the program +was in the middle of updating an important data structure, the data +structure will remain inconsistent. Since the program does not +terminate, the inconsistency is likely to be noticed later on. + +There are two ways to avoid this problem. One is to block the signal +for the parts of the program that update important data structures. +Blocking the signal delays its delivery until it is unblocked, once the +critical updating is finished. @xref{Blocking Signals}. + +The other way is to re-initialize the crucial data structures in the +signal handler, or to make their values consistent. + +Here is a rather schematic example showing the reinitialization of one +global variable. + +@smallexample +@group +#include <signal.h> +#include <setjmp.h> + +jmp_buf return_to_top_level; + +volatile sig_atomic_t waiting_for_input; + +void +handle_sigint (int signum) +@{ + /* @r{We may have been waiting for input when the signal arrived,} + @r{but we are no longer waiting once we transfer control.} */ + waiting_for_input = 0; + longjmp (return_to_top_level, 1); +@} +@end group + +@group +int +main (void) +@{ + @dots{} + signal (SIGINT, sigint_handler); + @dots{} + while (1) @{ + prepare_for_command (); + if (setjmp (return_to_top_level) == 0) + read_and_execute_command (); + @} +@} +@end group + +@group +/* @r{Imagine this is a subroutine used by various commands.} */ +char * +read_data () +@{ + if (input_from_terminal) @{ + waiting_for_input = 1; + @dots{} + waiting_for_input = 0; + @} else @{ + @dots{} + @} +@} +@end group +@end smallexample + + +@node Signals in Handler +@subsection Signals Arriving While a Handler Runs +@cindex race conditions, relating to signals + +What happens if another signal arrives while your signal handler +function is running? + +When the handler for a particular signal is invoked, that signal is +automatically blocked until the handler returns. That means that if two +signals of the same kind arrive close together, the second one will be +held until the first has been handled. (The handler can explicitly +unblock the signal using @code{sigprocmask}, if you want to allow more +signals of this type to arrive; see @ref{Process Signal Mask}.) + +However, your handler can still be interrupted by delivery of another +kind of signal. To avoid this, you can use the @code{sa_mask} member of +the action structure passed to @code{sigaction} to explicitly specify +which signals should be blocked while the signal handler runs. These +signals are in addition to the signal for which the handler was invoked, +and any other signals that are normally blocked by the process. +@xref{Blocking for Handler}. + +When the handler returns, the set of blocked signals is restored to the +value it had before the handler ran. So using @code{sigprocmask} inside +the handler only affects what signals can arrive during the execution of +the handler itself, not what signals can arrive once the handler returns. + +@strong{Portability Note:} Always use @code{sigaction} to establish a +handler for a signal that you expect to receive asynchronously, if you +want your program to work properly on System V Unix. On this system, +the handling of a signal whose handler was established with +@code{signal} automatically sets the signal's action back to +@code{SIG_DFL}, and the handler must re-establish itself each time it +runs. This practice, while inconvenient, does work when signals cannot +arrive in succession. However, if another signal can arrive right away, +it may arrive before the handler can re-establish itself. Then the +second signal would receive the default handling, which could terminate +the process. + +@node Merged Signals +@subsection Signals Close Together Merge into One +@cindex handling multiple signals +@cindex successive signals +@cindex merging of signals + +If multiple signals of the same type are delivered to your process +before your signal handler has a chance to be invoked at all, the +handler may only be invoked once, as if only a single signal had +arrived. In effect, the signals merge into one. This situation can +arise when the signal is blocked, or in a multiprocessing environment +where the system is busy running some other processes while the signals +are delivered. This means, for example, that you cannot reliably use a +signal handler to count signals. The only distinction you can reliably +make is whether at least one signal has arrived since a given time in +the past. + +Here is an example of a handler for @code{SIGCHLD} that compensates for +the fact that the number of signals received may not equal the number of +child processes that generate them. It assumes that the program keeps track +of all the child processes with a chain of structures as follows: + +@smallexample +struct process +@{ + struct process *next; + /* @r{The process ID of this child.} */ + int pid; + /* @r{The descriptor of the pipe or pseudo terminal} + @r{on which output comes from this child.} */ + int input_descriptor; + /* @r{Nonzero if this process has stopped or terminated.} */ + sig_atomic_t have_status; + /* @r{The status of this child; 0 if running,} + @r{otherwise a status value from @code{waitpid}.} */ + int status; +@}; + +struct process *process_list; +@end smallexample + +This example also uses a flag to indicate whether signals have arrived +since some time in the past---whenever the program last cleared it to +zero. + +@smallexample +/* @r{Nonzero means some child's status has changed} + @r{so look at @code{process_list} for the details.} */ +int process_status_change; +@end smallexample + +Here is the handler itself: + +@smallexample +void +sigchld_handler (int signo) +@{ + int old_errno = errno; + + while (1) @{ + register int pid; + int w; + struct process *p; + + /* @r{Keep asking for a status until we get a definitive result.} */ + do + @{ + errno = 0; + pid = waitpid (WAIT_ANY, &w, WNOHANG | WUNTRACED); + @} + while (pid <= 0 && errno == EINTR); + + if (pid <= 0) @{ + /* @r{A real failure means there are no more} + @r{stopped or terminated child processes, so return.} */ + errno = old_errno; + return; + @} + + /* @r{Find the process that signaled us, and record its status.} */ + + for (p = process_list; p; p = p->next) + if (p->pid == pid) @{ + p->status = w; + /* @r{Indicate that the @code{status} field} + @r{has data to look at. We do this only after storing it.} */ + p->have_status = 1; + + /* @r{If process has terminated, stop waiting for its output.} */ + if (WIFSIGNALED (w) || WIFEXITED (w)) + if (p->input_descriptor) + FD_CLR (p->input_descriptor, &input_wait_mask); + + /* @r{The program should check this flag from time to time} + @r{to see if there is any news in @code{process_list}.} */ + ++process_status_change; + @} + + /* @r{Loop around to handle all the processes} + @r{that have something to tell us.} */ + @} +@} +@end smallexample + +Here is the proper way to check the flag @code{process_status_change}: + +@smallexample +if (process_status_change) @{ + struct process *p; + process_status_change = 0; + for (p = process_list; p; p = p->next) + if (p->have_status) @{ + @dots{} @r{Examine @code{p->status}} @dots{} + @} +@} +@end smallexample + +@noindent +It is vital to clear the flag before examining the list; otherwise, if a +signal were delivered just before the clearing of the flag, and after +the appropriate element of the process list had been checked, the status +change would go unnoticed until the next signal arrived to set the flag +again. You could, of course, avoid this problem by blocking the signal +while scanning the list, but it is much more elegant to guarantee +correctness by doing things in the right order. + +The loop which checks process status avoids examining @code{p->status} +until it sees that status has been validly stored. This is to make sure +that the status cannot change in the middle of accessing it. Once +@code{p->have_status} is set, it means that the child process is stopped +or terminated, and in either case, it cannot stop or terminate again +until the program has taken notice. @xref{Atomic Usage}, for more +information about coping with interruptions during accesses of a +variable. + +Here is another way you can test whether the handler has run since the +last time you checked. This technique uses a counter which is never +changed outside the handler. Instead of clearing the count, the program +remembers the previous value and sees whether it has changed since the +previous check. The advantage of this method is that different parts of +the program can check independently, each part checking whether there +has been a signal since that part last checked. + +@smallexample +sig_atomic_t process_status_change; + +sig_atomic_t last_process_status_change; + +@dots{} +@{ + sig_atomic_t prev = last_process_status_change; + last_process_status_change = process_status_change; + if (last_process_status_change != prev) @{ + struct process *p; + for (p = process_list; p; p = p->next) + if (p->have_status) @{ + @dots{} @r{Examine @code{p->status}} @dots{} + @} + @} +@} +@end smallexample + +@node Nonreentrancy +@subsection Signal Handling and Nonreentrant Functions +@cindex restrictions on signal handler functions + +Handler functions usually don't do very much. The best practice is to +write a handler that does nothing but set an external variable that the +program checks regularly, and leave all serious work to the program. +This is best because the handler can be called asynchronously, at +unpredictable times---perhaps in the middle of a primitive function, or +even between the beginning and the end of a C operator that requires +multiple instructions. The data structures being manipulated might +therefore be in an inconsistent state when the handler function is +invoked. Even copying one @code{int} variable into another can take two +instructions on most machines. + +This means you have to be very careful about what you do in a signal +handler. + +@itemize @bullet +@item +@cindex @code{volatile} declarations +If your handler needs to access any global variables from your program, +declare those variables @code{volatile}. This tells the compiler that +the value of the variable might change asynchronously, and inhibits +certain optimizations that would be invalidated by such modifications. + +@item +@cindex reentrant functions +If you call a function in the handler, make sure it is @dfn{reentrant} +with respect to signals, or else make sure that the signal cannot +interrupt a call to a related function. +@end itemize + +A function can be non-reentrant if it uses memory that is not on the +stack. + +@itemize @bullet +@item +If a function uses a static variable or a global variable, or a +dynamically-allocated object that it finds for itself, then it is +non-reentrant and any two calls to the function can interfere. + +For example, suppose that the signal handler uses @code{gethostbyname}. +This function returns its value in a static object, reusing the same +object each time. If the signal happens to arrive during a call to +@code{gethostbyname}, or even after one (while the program is still +using the value), it will clobber the value that the program asked for. + +However, if the program does not use @code{gethostbyname} or any other +function that returns information in the same object, or if it always +blocks signals around each use, then you are safe. + +There are a large number of library functions that return values in a +fixed object, always reusing the same object in this fashion, and all of +them cause the same problem. Function descriptions in this manual +always mention this behavior. + +@item +If a function uses and modifies an object that you supply, then it is +potentially non-reentrant; two calls can interfere if they use the same +object. + +This case arises when you do I/O using streams. Suppose that the +signal handler prints a message with @code{fprintf}. Suppose that the +program was in the middle of an @code{fprintf} call using the same +stream when the signal was delivered. Both the signal handler's message +and the program's data could be corrupted, because both calls operate on +the same data structure---the stream itself. + +However, if you know that the stream that the handler uses cannot +possibly be used by the program at a time when signals can arrive, then +you are safe. It is no problem if the program uses some other stream. + +@item +On most systems, @code{malloc} and @code{free} are not reentrant, +because they use a static data structure which records what memory +blocks are free. As a result, no library functions that allocate or +free memory are reentrant. This includes functions that allocate space +to store a result. + +The best way to avoid the need to allocate memory in a handler is to +allocate in advance space for signal handlers to use. + +The best way to avoid freeing memory in a handler is to flag or record +the objects to be freed, and have the program check from time to time +whether anything is waiting to be freed. But this must be done with +care, because placing an object on a chain is not atomic, and if it is +interrupted by another signal handler that does the same thing, you +could ``lose'' one of the objects. + +@ignore +!!! not true +In @theglibc{}, @code{malloc} and @code{free} are safe to use in +signal handlers because they block signals. As a result, the library +functions that allocate space for a result are also safe in signal +handlers. The obstack allocation functions are safe as long as you +don't use the same obstack both inside and outside of a signal handler. +@end ignore + +@ignore +@comment Once we have r_alloc again add this paragraph. +The relocating allocation functions (@pxref{Relocating Allocator}) +are certainly not safe to use in a signal handler. +@end ignore + +@item +Any function that modifies @code{errno} is non-reentrant, but you can +correct for this: in the handler, save the original value of +@code{errno} and restore it before returning normally. This prevents +errors that occur within the signal handler from being confused with +errors from system calls at the point the program is interrupted to run +the handler. + +This technique is generally applicable; if you want to call in a handler +a function that modifies a particular object in memory, you can make +this safe by saving and restoring that object. + +@item +Merely reading from a memory object is safe provided that you can deal +with any of the values that might appear in the object at a time when +the signal can be delivered. Keep in mind that assignment to some data +types requires more than one instruction, which means that the handler +could run ``in the middle of'' an assignment to the variable if its type +is not atomic. @xref{Atomic Data Access}. + +@item +Merely writing into a memory object is safe as long as a sudden change +in the value, at any time when the handler might run, will not disturb +anything. +@end itemize + +@node Atomic Data Access +@subsection Atomic Data Access and Signal Handling + +Whether the data in your application concerns atoms, or mere text, you +have to be careful about the fact that access to a single datum is not +necessarily @dfn{atomic}. This means that it can take more than one +instruction to read or write a single object. In such cases, a signal +handler might be invoked in the middle of reading or writing the object. + +There are three ways you can cope with this problem. You can use data +types that are always accessed atomically; you can carefully arrange +that nothing untoward happens if an access is interrupted, or you can +block all signals around any access that had better not be interrupted +(@pxref{Blocking Signals}). + +@menu +* Non-atomic Example:: A program illustrating interrupted access. +* Types: Atomic Types. Data types that guarantee no interruption. +* Usage: Atomic Usage. Proving that interruption is harmless. +@end menu + +@node Non-atomic Example +@subsubsection Problems with Non-Atomic Access + +Here is an example which shows what can happen if a signal handler runs +in the middle of modifying a variable. (Interrupting the reading of a +variable can also lead to paradoxical results, but here we only show +writing.) + +@smallexample +#include <signal.h> +#include <stdio.h> + +volatile struct two_words @{ int a, b; @} memory; + +void +handler(int signum) +@{ + printf ("%d,%d\n", memory.a, memory.b); + alarm (1); +@} + +@group +int +main (void) +@{ + static struct two_words zeros = @{ 0, 0 @}, ones = @{ 1, 1 @}; + signal (SIGALRM, handler); + memory = zeros; + alarm (1); + while (1) + @{ + memory = zeros; + memory = ones; + @} +@} +@end group +@end smallexample + +This program fills @code{memory} with zeros, ones, zeros, ones, +alternating forever; meanwhile, once per second, the alarm signal handler +prints the current contents. (Calling @code{printf} in the handler is +safe in this program because it is certainly not being called outside +the handler when the signal happens.) + +Clearly, this program can print a pair of zeros or a pair of ones. But +that's not all it can do! On most machines, it takes several +instructions to store a new value in @code{memory}, and the value is +stored one word at a time. If the signal is delivered in between these +instructions, the handler might find that @code{memory.a} is zero and +@code{memory.b} is one (or vice versa). + +On some machines it may be possible to store a new value in +@code{memory} with just one instruction that cannot be interrupted. On +these machines, the handler will always print two zeros or two ones. + +@node Atomic Types +@subsubsection Atomic Types + +To avoid uncertainty about interrupting access to a variable, you can +use a particular data type for which access is always atomic: +@code{sig_atomic_t}. Reading and writing this data type is guaranteed +to happen in a single instruction, so there's no way for a handler to +run ``in the middle'' of an access. + +The type @code{sig_atomic_t} is always an integer data type, but which +one it is, and how many bits it contains, may vary from machine to +machine. + +@comment signal.h +@comment ISO +@deftp {Data Type} sig_atomic_t +This is an integer data type. Objects of this type are always accessed +atomically. +@end deftp + +In practice, you can assume that @code{int} is atomic. +You can also assume that pointer +types are atomic; that is very convenient. Both of these assumptions +are true on all of the machines that @theglibc{} supports and on +all POSIX systems we know of. +@c ??? This might fail on a 386 that uses 64-bit pointers. + +@node Atomic Usage +@subsubsection Atomic Usage Patterns + +Certain patterns of access avoid any problem even if an access is +interrupted. For example, a flag which is set by the handler, and +tested and cleared by the main program from time to time, is always safe +even if access actually requires two instructions. To show that this is +so, we must consider each access that could be interrupted, and show +that there is no problem if it is interrupted. + +An interrupt in the middle of testing the flag is safe because either it's +recognized to be nonzero, in which case the precise value doesn't +matter, or it will be seen to be nonzero the next time it's tested. + +An interrupt in the middle of clearing the flag is no problem because +either the value ends up zero, which is what happens if a signal comes +in just before the flag is cleared, or the value ends up nonzero, and +subsequent events occur as if the signal had come in just after the flag +was cleared. As long as the code handles both of these cases properly, +it can also handle a signal in the middle of clearing the flag. (This +is an example of the sort of reasoning you need to do to figure out +whether non-atomic usage is safe.) + +Sometimes you can ensure uninterrupted access to one object by +protecting its use with another object, perhaps one whose type +guarantees atomicity. @xref{Merged Signals}, for an example. + +@node Interrupted Primitives +@section Primitives Interrupted by Signals + +A signal can arrive and be handled while an I/O primitive such as +@code{open} or @code{read} is waiting for an I/O device. If the signal +handler returns, the system faces the question: what should happen next? + +POSIX specifies one approach: make the primitive fail right away. The +error code for this kind of failure is @code{EINTR}. This is flexible, +but usually inconvenient. Typically, POSIX applications that use signal +handlers must check for @code{EINTR} after each library function that +can return it, in order to try the call again. Often programmers forget +to check, which is a common source of error. + +@Theglibc{} provides a convenient way to retry a call after a +temporary failure, with the macro @code{TEMP_FAILURE_RETRY}: + +@comment unistd.h +@comment GNU +@defmac TEMP_FAILURE_RETRY (@var{expression}) +This macro evaluates @var{expression} once, and examines its value as +type @code{long int}. If the value equals @code{-1}, that indicates a +failure and @code{errno} should be set to show what kind of failure. +If it fails and reports error code @code{EINTR}, +@code{TEMP_FAILURE_RETRY} evaluates it again, and over and over until +the result is not a temporary failure. + +The value returned by @code{TEMP_FAILURE_RETRY} is whatever value +@var{expression} produced. +@end defmac + +BSD avoids @code{EINTR} entirely and provides a more convenient +approach: to restart the interrupted primitive, instead of making it +fail. If you choose this approach, you need not be concerned with +@code{EINTR}. + +You can choose either approach with @theglibc{}. If you use +@code{sigaction} to establish a signal handler, you can specify how that +handler should behave. If you specify the @code{SA_RESTART} flag, +return from that handler will resume a primitive; otherwise, return from +that handler will cause @code{EINTR}. @xref{Flags for Sigaction}. + +Another way to specify the choice is with the @code{siginterrupt} +function. @xref{BSD Signal Handling}. + +When you don't specify with @code{sigaction} or @code{siginterrupt} what +a particular handler should do, it uses a default choice. The default +choice in @theglibc{} is to make primitives fail with @code{EINTR}. +@cindex EINTR, and restarting interrupted primitives +@cindex restarting interrupted primitives +@cindex interrupting primitives +@cindex primitives, interrupting +@c !!! want to have @cindex system calls @i{see} primitives [no page #] + +The description of each primitive affected by this issue +lists @code{EINTR} among the error codes it can return. + +There is one situation where resumption never happens no matter which +choice you make: when a data-transfer function such as @code{read} or +@code{write} is interrupted by a signal after transferring part of the +data. In this case, the function returns the number of bytes already +transferred, indicating partial success. + +This might at first appear to cause unreliable behavior on +record-oriented devices (including datagram sockets; @pxref{Datagrams}), +where splitting one @code{read} or @code{write} into two would read or +write two records. Actually, there is no problem, because interruption +after a partial transfer cannot happen on such devices; they always +transfer an entire record in one burst, with no waiting once data +transfer has started. + +@node Generating Signals +@section Generating Signals +@cindex sending signals +@cindex raising signals +@cindex signals, generating + +Besides signals that are generated as a result of a hardware trap or +interrupt, your program can explicitly send signals to itself or to +another process. + +@menu +* Signaling Yourself:: A process can send a signal to itself. +* Signaling Another Process:: Send a signal to another process. +* Permission for kill:: Permission for using @code{kill}. +* Kill Example:: Using @code{kill} for Communication. +@end menu + +@node Signaling Yourself +@subsection Signaling Yourself + +A process can send itself a signal with the @code{raise} function. This +function is declared in @file{signal.h}. +@pindex signal.h + +@comment signal.h +@comment ISO +@deftypefun int raise (int @var{signum}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c raise ok +@c [posix] +@c getpid dup ok +@c kill dup ok +@c [linux] +@c syscall(gettid) ok +@c syscall(tgkill) ok +The @code{raise} function sends the signal @var{signum} to the calling +process. It returns zero if successful and a nonzero value if it fails. +About the only reason for failure would be if the value of @var{signum} +is invalid. +@end deftypefun + +@comment signal.h +@comment SVID +@deftypefun int gsignal (int @var{signum}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Aliases raise. +The @code{gsignal} function does the same thing as @code{raise}; it is +provided only for compatibility with SVID. +@end deftypefun + +One convenient use for @code{raise} is to reproduce the default behavior +of a signal that you have trapped. For instance, suppose a user of your +program types the SUSP character (usually @kbd{C-z}; @pxref{Special +Characters}) to send it an interactive stop signal +(@code{SIGTSTP}), and you want to clean up some internal data buffers +before stopping. You might set this up like this: + +@comment RMS suggested getting rid of the handler for SIGCONT in this function. +@comment But that would require that the handler for SIGTSTP unblock the +@comment signal before doing the call to raise. We haven't covered that +@comment topic yet, and I don't want to distract from the main point of +@comment the example with a digression to explain what is going on. As +@comment the example is written, the signal that is raise'd will be delivered +@comment as soon as the SIGTSTP handler returns, which is fine. + +@smallexample +#include <signal.h> + +/* @r{When a stop signal arrives, set the action back to the default + and then resend the signal after doing cleanup actions.} */ + +void +tstp_handler (int sig) +@{ + signal (SIGTSTP, SIG_DFL); + /* @r{Do cleanup actions here.} */ + @dots{} + raise (SIGTSTP); +@} + +/* @r{When the process is continued again, restore the signal handler.} */ + +void +cont_handler (int sig) +@{ + signal (SIGCONT, cont_handler); + signal (SIGTSTP, tstp_handler); +@} + +@group +/* @r{Enable both handlers during program initialization.} */ + +int +main (void) +@{ + signal (SIGCONT, cont_handler); + signal (SIGTSTP, tstp_handler); + @dots{} +@} +@end group +@end smallexample + +@strong{Portability note:} @code{raise} was invented by the @w{ISO C} +committee. Older systems may not support it, so using @code{kill} may +be more portable. @xref{Signaling Another Process}. + +@node Signaling Another Process +@subsection Signaling Another Process + +@cindex killing a process +The @code{kill} function can be used to send a signal to another process. +In spite of its name, it can be used for a lot of things other than +causing a process to terminate. Some examples of situations where you +might want to send signals between processes are: + +@itemize @bullet +@item +A parent process starts a child to perform a task---perhaps having the +child running an infinite loop---and then terminates the child when the +task is no longer needed. + +@item +A process executes as part of a group, and needs to terminate or notify +the other processes in the group when an error or other event occurs. + +@item +Two processes need to synchronize while working together. +@end itemize + +This section assumes that you know a little bit about how processes +work. For more information on this subject, see @ref{Processes}. + +The @code{kill} function is declared in @file{signal.h}. +@pindex signal.h + +@comment signal.h +@comment POSIX.1 +@deftypefun int kill (pid_t @var{pid}, int @var{signum}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c The hurd implementation is not a critical section, so it's not +@c immediately obvious that, in case of cancellation, it won't leak +@c ports or the memory allocated by proc_getpgrppids when pid <= 0. +@c Since none of these make it AC-Unsafe, I'm leaving them out. +The @code{kill} function sends the signal @var{signum} to the process +or process group specified by @var{pid}. Besides the signals listed in +@ref{Standard Signals}, @var{signum} can also have a value of zero to +check the validity of the @var{pid}. + +The @var{pid} specifies the process or process group to receive the +signal: + +@table @code +@item @var{pid} > 0 +The process whose identifier is @var{pid}. + +@item @var{pid} == 0 +All processes in the same process group as the sender. + +@item @var{pid} < -1 +The process group whose identifier is @minus{}@var{pid}. + +@item @var{pid} == -1 +If the process is privileged, send the signal to all processes except +for some special system processes. Otherwise, send the signal to all +processes with the same effective user ID. +@end table + +A process can send a signal to itself with a call like @w{@code{kill +(getpid(), @var{signum})}}. If @code{kill} is used by a process to send +a signal to itself, and the signal is not blocked, then @code{kill} +delivers at least one signal (which might be some other pending +unblocked signal instead of the signal @var{signum}) to that process +before it returns. + +The return value from @code{kill} is zero if the signal can be sent +successfully. Otherwise, no signal is sent, and a value of @code{-1} is +returned. If @var{pid} specifies sending a signal to several processes, +@code{kill} succeeds if it can send the signal to at least one of them. +There's no way you can tell which of the processes got the signal +or whether all of them did. + +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EINVAL +The @var{signum} argument is an invalid or unsupported number. + +@item EPERM +You do not have the privilege to send a signal to the process or any of +the processes in the process group named by @var{pid}. + +@item ESRCH +The @var{pid} argument does not refer to an existing process or group. +@end table +@end deftypefun + +@comment signal.h +@comment BSD +@deftypefun int killpg (int @var{pgid}, int @var{signum}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Calls kill with -pgid. +This is similar to @code{kill}, but sends signal @var{signum} to the +process group @var{pgid}. This function is provided for compatibility +with BSD; using @code{kill} to do this is more portable. +@end deftypefun + +As a simple example of @code{kill}, the call @w{@code{kill (getpid (), +@var{sig})}} has the same effect as @w{@code{raise (@var{sig})}}. + +@node Permission for kill +@subsection Permission for using @code{kill} + +There are restrictions that prevent you from using @code{kill} to send +signals to any random process. These are intended to prevent antisocial +behavior such as arbitrarily killing off processes belonging to another +user. In typical use, @code{kill} is used to pass signals between +parent, child, and sibling processes, and in these situations you +normally do have permission to send signals. The only common exception +is when you run a setuid program in a child process; if the program +changes its real UID as well as its effective UID, you may not have +permission to send a signal. The @code{su} program does this. + +Whether a process has permission to send a signal to another process +is determined by the user IDs of the two processes. This concept is +discussed in detail in @ref{Process Persona}. + +Generally, for a process to be able to send a signal to another process, +either the sending process must belong to a privileged user (like +@samp{root}), or the real or effective user ID of the sending process +must match the real or effective user ID of the receiving process. If +the receiving process has changed its effective user ID from the +set-user-ID mode bit on its process image file, then the owner of the +process image file is used in place of its current effective user ID. +In some implementations, a parent process might be able to send signals +to a child process even if the user ID's don't match, and other +implementations might enforce other restrictions. + +The @code{SIGCONT} signal is a special case. It can be sent if the +sender is part of the same session as the receiver, regardless of +user IDs. + +@node Kill Example +@subsection Using @code{kill} for Communication +@cindex interprocess communication, with signals +Here is a longer example showing how signals can be used for +interprocess communication. This is what the @code{SIGUSR1} and +@code{SIGUSR2} signals are provided for. Since these signals are fatal +by default, the process that is supposed to receive them must trap them +through @code{signal} or @code{sigaction}. + +In this example, a parent process forks a child process and then waits +for the child to complete its initialization. The child process tells +the parent when it is ready by sending it a @code{SIGUSR1} signal, using +the @code{kill} function. + +@smallexample +@include sigusr.c.texi +@end smallexample + +This example uses a busy wait, which is bad, because it wastes CPU +cycles that other programs could otherwise use. It is better to ask the +system to wait until the signal arrives. See the example in +@ref{Waiting for a Signal}. + +@node Blocking Signals +@section Blocking Signals +@cindex blocking signals + +Blocking a signal means telling the operating system to hold it and +deliver it later. Generally, a program does not block signals +indefinitely---it might as well ignore them by setting their actions to +@code{SIG_IGN}. But it is useful to block signals briefly, to prevent +them from interrupting sensitive operations. For instance: + +@itemize @bullet +@item +You can use the @code{sigprocmask} function to block signals while you +modify global variables that are also modified by the handlers for these +signals. + +@item +You can set @code{sa_mask} in your @code{sigaction} call to block +certain signals while a particular signal handler runs. This way, the +signal handler can run without being interrupted itself by signals. +@end itemize + +@menu +* Why Block:: The purpose of blocking signals. +* Signal Sets:: How to specify which signals to + block. +* Process Signal Mask:: Blocking delivery of signals to your + process during normal execution. +* Testing for Delivery:: Blocking to Test for Delivery of + a Signal. +* Blocking for Handler:: Blocking additional signals while a + handler is being run. +* Checking for Pending Signals:: Checking for Pending Signals +* Remembering a Signal:: How you can get almost the same + effect as blocking a signal, by + handling it and setting a flag + to be tested later. +@end menu + +@node Why Block +@subsection Why Blocking Signals is Useful + +Temporary blocking of signals with @code{sigprocmask} gives you a way to +prevent interrupts during critical parts of your code. If signals +arrive in that part of the program, they are delivered later, after you +unblock them. + +One example where this is useful is for sharing data between a signal +handler and the rest of the program. If the type of the data is not +@code{sig_atomic_t} (@pxref{Atomic Data Access}), then the signal +handler could run when the rest of the program has only half finished +reading or writing the data. This would lead to confusing consequences. + +To make the program reliable, you can prevent the signal handler from +running while the rest of the program is examining or modifying that +data---by blocking the appropriate signal around the parts of the +program that touch the data. + +Blocking signals is also necessary when you want to perform a certain +action only if a signal has not arrived. Suppose that the handler for +the signal sets a flag of type @code{sig_atomic_t}; you would like to +test the flag and perform the action if the flag is not set. This is +unreliable. Suppose the signal is delivered immediately after you test +the flag, but before the consequent action: then the program will +perform the action even though the signal has arrived. + +The only way to test reliably for whether a signal has yet arrived is to +test while the signal is blocked. + +@node Signal Sets +@subsection Signal Sets + +All of the signal blocking functions use a data structure called a +@dfn{signal set} to specify what signals are affected. Thus, every +activity involves two stages: creating the signal set, and then passing +it as an argument to a library function. +@cindex signal set + +These facilities are declared in the header file @file{signal.h}. +@pindex signal.h + +@comment signal.h +@comment POSIX.1 +@deftp {Data Type} sigset_t +The @code{sigset_t} data type is used to represent a signal set. +Internally, it may be implemented as either an integer or structure +type. + +For portability, use only the functions described in this section to +initialize, change, and retrieve information from @code{sigset_t} +objects---don't try to manipulate them directly. +@end deftp + +There are two ways to initialize a signal set. You can initially +specify it to be empty with @code{sigemptyset} and then add specified +signals individually. Or you can specify it to be full with +@code{sigfillset} and then delete specified signals individually. + +You must always initialize the signal set with one of these two +functions before using it in any other way. Don't try to set all the +signals explicitly because the @code{sigset_t} object might include some +other information (like a version field) that needs to be initialized as +well. (In addition, it's not wise to put into your program an +assumption that the system has no signals aside from the ones you know +about.) + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigemptyset (sigset_t *@var{set}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Just memsets all of set to zero. +This function initializes the signal set @var{set} to exclude all of the +defined signals. It always returns @code{0}. +@end deftypefun + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigfillset (sigset_t *@var{set}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function initializes the signal set @var{set} to include +all of the defined signals. Again, the return value is @code{0}. +@end deftypefun + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigaddset (sigset_t *@var{set}, int @var{signum}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function adds the signal @var{signum} to the signal set @var{set}. +All @code{sigaddset} does is modify @var{set}; it does not block or +unblock any signals. + +The return value is @code{0} on success and @code{-1} on failure. +The following @code{errno} error condition is defined for this function: + +@table @code +@item EINVAL +The @var{signum} argument doesn't specify a valid signal. +@end table +@end deftypefun + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigdelset (sigset_t *@var{set}, int @var{signum}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function removes the signal @var{signum} from the signal set +@var{set}. All @code{sigdelset} does is modify @var{set}; it does not +block or unblock any signals. The return value and error conditions are +the same as for @code{sigaddset}. +@end deftypefun + +Finally, there is a function to test what signals are in a signal set: + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigismember (const sigset_t *@var{set}, int @var{signum}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{sigismember} function tests whether the signal @var{signum} is +a member of the signal set @var{set}. It returns @code{1} if the signal +is in the set, @code{0} if not, and @code{-1} if there is an error. + +The following @code{errno} error condition is defined for this function: + +@table @code +@item EINVAL +The @var{signum} argument doesn't specify a valid signal. +@end table +@end deftypefun + +@node Process Signal Mask +@subsection Process Signal Mask +@cindex signal mask +@cindex process signal mask + +The collection of signals that are currently blocked is called the +@dfn{signal mask}. Each process has its own signal mask. When you +create a new process (@pxref{Creating a Process}), it inherits its +parent's mask. You can block or unblock signals with total flexibility +by modifying the signal mask. + +The prototype for the @code{sigprocmask} function is in @file{signal.h}. +@pindex signal.h + +Note that you must not use @code{sigprocmask} in multi-threaded processes, +because each thread has its own signal mask and there is no single process +signal mask. According to POSIX, the behavior of @code{sigprocmask} in a +multi-threaded process is ``unspecified''. +Instead, use @code{pthread_sigmask}. +@ifset linuxthreads +@xref{Threads and Signal Handling}. +@end ifset + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigprocmask (int @var{how}, const sigset_t *restrict @var{set}, sigset_t *restrict @var{oldset}) +@safety{@prelim{}@mtunsafe{@mtasurace{:sigprocmask/bsd(SIG_UNBLOCK)}}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c This takes the hurd_self_sigstate-returned object's lock on HURD. On +@c BSD, SIG_UNBLOCK is emulated with two sigblock calls, which +@c introduces a race window. +The @code{sigprocmask} function is used to examine or change the calling +process's signal mask. The @var{how} argument determines how the signal +mask is changed, and must be one of the following values: + +@vtable @code +@comment signal.h +@comment POSIX.1 +@item SIG_BLOCK +Block the signals in @code{set}---add them to the existing mask. In +other words, the new mask is the union of the existing mask and +@var{set}. + +@comment signal.h +@comment POSIX.1 +@item SIG_UNBLOCK +Unblock the signals in @var{set}---remove them from the existing mask. + +@comment signal.h +@comment POSIX.1 +@item SIG_SETMASK +Use @var{set} for the mask; ignore the previous value of the mask. +@end vtable + +The last argument, @var{oldset}, is used to return information about the +old process signal mask. If you just want to change the mask without +looking at it, pass a null pointer as the @var{oldset} argument. +Similarly, if you want to know what's in the mask without changing it, +pass a null pointer for @var{set} (in this case the @var{how} argument +is not significant). The @var{oldset} argument is often used to +remember the previous signal mask in order to restore it later. (Since +the signal mask is inherited over @code{fork} and @code{exec} calls, you +can't predict what its contents are when your program starts running.) + +If invoking @code{sigprocmask} causes any pending signals to be +unblocked, at least one of those signals is delivered to the process +before @code{sigprocmask} returns. The order in which pending signals +are delivered is not specified, but you can control the order explicitly +by making multiple @code{sigprocmask} calls to unblock various signals +one at a time. + +The @code{sigprocmask} function returns @code{0} if successful, and @code{-1} +to indicate an error. The following @code{errno} error conditions are +defined for this function: + +@table @code +@item EINVAL +The @var{how} argument is invalid. +@end table + +You can't block the @code{SIGKILL} and @code{SIGSTOP} signals, but +if the signal set includes these, @code{sigprocmask} just ignores +them instead of returning an error status. + +Remember, too, that blocking program error signals such as @code{SIGFPE} +leads to undesirable results for signals generated by an actual program +error (as opposed to signals sent with @code{raise} or @code{kill}). +This is because your program may be too broken to be able to continue +executing to a point where the signal is unblocked again. +@xref{Program Error Signals}. +@end deftypefun + +@node Testing for Delivery +@subsection Blocking to Test for Delivery of a Signal + +Now for a simple example. Suppose you establish a handler for +@code{SIGALRM} signals that sets a flag whenever a signal arrives, and +your main program checks this flag from time to time and then resets it. +You can prevent additional @code{SIGALRM} signals from arriving in the +meantime by wrapping the critical part of the code with calls to +@code{sigprocmask}, like this: + +@smallexample +/* @r{This variable is set by the SIGALRM signal handler.} */ +volatile sig_atomic_t flag = 0; + +int +main (void) +@{ + sigset_t block_alarm; + + @dots{} + + /* @r{Initialize the signal mask.} */ + sigemptyset (&block_alarm); + sigaddset (&block_alarm, SIGALRM); + +@group + while (1) + @{ + /* @r{Check if a signal has arrived; if so, reset the flag.} */ + sigprocmask (SIG_BLOCK, &block_alarm, NULL); + if (flag) + @{ + @var{actions-if-not-arrived} + flag = 0; + @} + sigprocmask (SIG_UNBLOCK, &block_alarm, NULL); + + @dots{} + @} +@} +@end group +@end smallexample + +@node Blocking for Handler +@subsection Blocking Signals for a Handler +@cindex blocking signals, in a handler + +When a signal handler is invoked, you usually want it to be able to +finish without being interrupted by another signal. From the moment the +handler starts until the moment it finishes, you must block signals that +might confuse it or corrupt its data. + +When a handler function is invoked on a signal, that signal is +automatically blocked (in addition to any other signals that are already +in the process's signal mask) during the time the handler is running. +If you set up a handler for @code{SIGTSTP}, for instance, then the +arrival of that signal forces further @code{SIGTSTP} signals to wait +during the execution of the handler. + +However, by default, other kinds of signals are not blocked; they can +arrive during handler execution. + +The reliable way to block other kinds of signals during the execution of +the handler is to use the @code{sa_mask} member of the @code{sigaction} +structure. + +Here is an example: + +@smallexample +#include <signal.h> +#include <stddef.h> + +void catch_stop (); + +void +install_handler (void) +@{ + struct sigaction setup_action; + sigset_t block_mask; + + sigemptyset (&block_mask); + /* @r{Block other terminal-generated signals while handler runs.} */ + sigaddset (&block_mask, SIGINT); + sigaddset (&block_mask, SIGQUIT); + setup_action.sa_handler = catch_stop; + setup_action.sa_mask = block_mask; + setup_action.sa_flags = 0; + sigaction (SIGTSTP, &setup_action, NULL); +@} +@end smallexample + +This is more reliable than blocking the other signals explicitly in the +code for the handler. If you block signals explicitly in the handler, +you can't avoid at least a short interval at the beginning of the +handler where they are not yet blocked. + +You cannot remove signals from the process's current mask using this +mechanism. However, you can make calls to @code{sigprocmask} within +your handler to block or unblock signals as you wish. + +In any case, when the handler returns, the system restores the mask that +was in place before the handler was entered. If any signals that become +unblocked by this restoration are pending, the process will receive +those signals immediately, before returning to the code that was +interrupted. + +@node Checking for Pending Signals +@subsection Checking for Pending Signals +@cindex pending signals, checking for +@cindex blocked signals, checking for +@cindex checking for pending signals + +You can find out which signals are pending at any time by calling +@code{sigpending}. This function is declared in @file{signal.h}. +@pindex signal.h + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigpending (sigset_t *@var{set}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c Direct rt_sigpending syscall on most systems. On hurd, calls +@c hurd_self_sigstate, it copies the sigstate's pending while holding +@c its lock. +The @code{sigpending} function stores information about pending signals +in @var{set}. If there is a pending signal that is blocked from +delivery, then that signal is a member of the returned set. (You can +test whether a particular signal is a member of this set using +@code{sigismember}; see @ref{Signal Sets}.) + +The return value is @code{0} if successful, and @code{-1} on failure. +@end deftypefun + +Testing whether a signal is pending is not often useful. Testing when +that signal is not blocked is almost certainly bad design. + +Here is an example. + +@smallexample +#include <signal.h> +#include <stddef.h> + +sigset_t base_mask, waiting_mask; + +sigemptyset (&base_mask); +sigaddset (&base_mask, SIGINT); +sigaddset (&base_mask, SIGTSTP); + +/* @r{Block user interrupts while doing other processing.} */ +sigprocmask (SIG_SETMASK, &base_mask, NULL); +@dots{} + +/* @r{After a while, check to see whether any signals are pending.} */ +sigpending (&waiting_mask); +if (sigismember (&waiting_mask, SIGINT)) @{ + /* @r{User has tried to kill the process.} */ +@} +else if (sigismember (&waiting_mask, SIGTSTP)) @{ + /* @r{User has tried to stop the process.} */ +@} +@end smallexample + +Remember that if there is a particular signal pending for your process, +additional signals of that same type that arrive in the meantime might +be discarded. For example, if a @code{SIGINT} signal is pending when +another @code{SIGINT} signal arrives, your program will probably only +see one of them when you unblock this signal. + +@strong{Portability Note:} The @code{sigpending} function is new in +POSIX.1. Older systems have no equivalent facility. + +@node Remembering a Signal +@subsection Remembering a Signal to Act On Later + +Instead of blocking a signal using the library facilities, you can get +almost the same results by making the handler set a flag to be tested +later, when you ``unblock''. Here is an example: + +@smallexample +/* @r{If this flag is nonzero, don't handle the signal right away.} */ +volatile sig_atomic_t signal_pending; + +/* @r{This is nonzero if a signal arrived and was not handled.} */ +volatile sig_atomic_t defer_signal; + +void +handler (int signum) +@{ + if (defer_signal) + signal_pending = signum; + else + @dots{} /* @r{``Really'' handle the signal.} */ +@} + +@dots{} + +void +update_mumble (int frob) +@{ + /* @r{Prevent signals from having immediate effect.} */ + defer_signal++; + /* @r{Now update @code{mumble}, without worrying about interruption.} */ + mumble.a = 1; + mumble.b = hack (); + mumble.c = frob; + /* @r{We have updated @code{mumble}. Handle any signal that came in.} */ + defer_signal--; + if (defer_signal == 0 && signal_pending != 0) + raise (signal_pending); +@} +@end smallexample + +Note how the particular signal that arrives is stored in +@code{signal_pending}. That way, we can handle several types of +inconvenient signals with the same mechanism. + +We increment and decrement @code{defer_signal} so that nested critical +sections will work properly; thus, if @code{update_mumble} were called +with @code{signal_pending} already nonzero, signals would be deferred +not only within @code{update_mumble}, but also within the caller. This +is also why we do not check @code{signal_pending} if @code{defer_signal} +is still nonzero. + +The incrementing and decrementing of @code{defer_signal} each require more +than one instruction; it is possible for a signal to happen in the +middle. But that does not cause any problem. If the signal happens +early enough to see the value from before the increment or decrement, +that is equivalent to a signal which came before the beginning of the +increment or decrement, which is a case that works properly. + +It is absolutely vital to decrement @code{defer_signal} before testing +@code{signal_pending}, because this avoids a subtle bug. If we did +these things in the other order, like this, + +@smallexample + if (defer_signal == 1 && signal_pending != 0) + raise (signal_pending); + defer_signal--; +@end smallexample + +@noindent +then a signal arriving in between the @code{if} statement and the decrement +would be effectively ``lost'' for an indefinite amount of time. The +handler would merely set @code{defer_signal}, but the program having +already tested this variable, it would not test the variable again. + +@cindex timing error in signal handling +Bugs like these are called @dfn{timing errors}. They are especially bad +because they happen only rarely and are nearly impossible to reproduce. +You can't expect to find them with a debugger as you would find a +reproducible bug. So it is worth being especially careful to avoid +them. + +(You would not be tempted to write the code in this order, given the use +of @code{defer_signal} as a counter which must be tested along with +@code{signal_pending}. After all, testing for zero is cleaner than +testing for one. But if you did not use @code{defer_signal} as a +counter, and gave it values of zero and one only, then either order +might seem equally simple. This is a further advantage of using a +counter for @code{defer_signal}: it will reduce the chance you will +write the code in the wrong order and create a subtle bug.) + +@node Waiting for a Signal +@section Waiting for a Signal +@cindex waiting for a signal +@cindex @code{pause} function + +If your program is driven by external events, or uses signals for +synchronization, then when it has nothing to do it should probably wait +until a signal arrives. + +@menu +* Using Pause:: The simple way, using @code{pause}. +* Pause Problems:: Why the simple way is often not very good. +* Sigsuspend:: Reliably waiting for a specific signal. +@end menu + +@node Using Pause +@subsection Using @code{pause} + +The simple way to wait until a signal arrives is to call @code{pause}. +Please read about its disadvantages, in the following section, before +you use it. + +@comment unistd.h +@comment POSIX.1 +@deftypefun int pause (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:sigprocmask/!bsd!linux}}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c The signal mask read by sigprocmask may be overridden by another +@c thread or by a signal handler before we call sigsuspend. Is this a +@c safety issue? Probably not. +@c pause @mtasurace:sigprocmask/!bsd!linux @asulock/hurd @aculock/hurd +@c [ports/linux/generic] +@c syscall_pause ok +@c [posix] +@c sigemptyset dup ok +@c sigprocmask(SIG_BLOCK) dup @asulock/hurd @aculock/hurd [no @mtasurace:sigprocmask/bsd(SIG_UNBLOCK)] +@c sigsuspend dup @mtasurace:sigprocmask/!bsd!linux @asulock/hurd @aculock/hurd +The @code{pause} function suspends program execution until a signal +arrives whose action is either to execute a handler function, or to +terminate the process. + +If the signal causes a handler function to be executed, then +@code{pause} returns. This is considered an unsuccessful return (since +``successful'' behavior would be to suspend the program forever), so the +return value is @code{-1}. Even if you specify that other primitives +should resume when a system handler returns (@pxref{Interrupted +Primitives}), this has no effect on @code{pause}; it always fails when a +signal is handled. + +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EINTR +The function was interrupted by delivery of a signal. +@end table + +If the signal causes program termination, @code{pause} doesn't return +(obviously). + +This function is a cancellation point in multithreaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{pause} is +called. If the thread gets cancelled these resources stay allocated +until the program ends. To avoid this calls to @code{pause} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The @code{pause} function is declared in @file{unistd.h}. +@end deftypefun + +@node Pause Problems +@subsection Problems with @code{pause} + +The simplicity of @code{pause} can conceal serious timing errors that +can make a program hang mysteriously. + +It is safe to use @code{pause} if the real work of your program is done +by the signal handlers themselves, and the ``main program'' does nothing +but call @code{pause}. Each time a signal is delivered, the handler +will do the next batch of work that is to be done, and then return, so +that the main loop of the program can call @code{pause} again. + +You can't safely use @code{pause} to wait until one more signal arrives, +and then resume real work. Even if you arrange for the signal handler +to cooperate by setting a flag, you still can't use @code{pause} +reliably. Here is an example of this problem: + +@smallexample +/* @r{@code{usr_interrupt} is set by the signal handler.} */ +if (!usr_interrupt) + pause (); + +/* @r{Do work once the signal arrives.} */ +@dots{} +@end smallexample + +@noindent +This has a bug: the signal could arrive after the variable +@code{usr_interrupt} is checked, but before the call to @code{pause}. +If no further signals arrive, the process would never wake up again. + +You can put an upper limit on the excess waiting by using @code{sleep} +in a loop, instead of using @code{pause}. (@xref{Sleeping}, for more +about @code{sleep}.) Here is what this looks like: + +@smallexample +/* @r{@code{usr_interrupt} is set by the signal handler.} +while (!usr_interrupt) + sleep (1); + +/* @r{Do work once the signal arrives.} */ +@dots{} +@end smallexample + +For some purposes, that is good enough. But with a little more +complexity, you can wait reliably until a particular signal handler is +run, using @code{sigsuspend}. +@ifinfo +@xref{Sigsuspend}. +@end ifinfo + +@node Sigsuspend +@subsection Using @code{sigsuspend} + +The clean and reliable way to wait for a signal to arrive is to block it +and then use @code{sigsuspend}. By using @code{sigsuspend} in a loop, +you can wait for certain kinds of signals, while letting other kinds of +signals be handled by their handlers. + +@comment signal.h +@comment POSIX.1 +@deftypefun int sigsuspend (const sigset_t *@var{set}) +@safety{@prelim{}@mtunsafe{@mtasurace{:sigprocmask/!bsd!linux}}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c sigsuspend @mtasurace:sigprocmask/!bsd!linux @asulock/hurd @aculock/hurd +@c [posix] @mtasurace:sigprocmask/!bsd!linux +@c saving and restoring the procmask is racy +@c sigprocmask(SIG_SETMASK) dup @asulock/hurd @aculock/hurd [no @mtasurace:sigprocmask/bsd(SIG_UNBLOCK)] +@c pause @asulock/hurd @aculock/hurd +@c [bsd] +@c sigismember dup ok +@c sigmask dup ok +@c sigpause dup ok [no @mtasurace:sigprocmask/!bsd!linux @asulock/hurd @aculock/hurd] +@c [linux] +@c do_sigsuspend ok +This function replaces the process's signal mask with @var{set} and then +suspends the process until a signal is delivered whose action is either +to terminate the process or invoke a signal handling function. In other +words, the program is effectively suspended until one of the signals that +is not a member of @var{set} arrives. + +If the process is woken up by delivery of a signal that invokes a handler +function, and the handler function returns, then @code{sigsuspend} also +returns. + +The mask remains @var{set} only as long as @code{sigsuspend} is waiting. +The function @code{sigsuspend} always restores the previous signal mask +when it returns. + +The return value and error conditions are the same as for @code{pause}. +@end deftypefun + +With @code{sigsuspend}, you can replace the @code{pause} or @code{sleep} +loop in the previous section with something completely reliable: + +@smallexample +sigset_t mask, oldmask; + +@dots{} + +/* @r{Set up the mask of signals to temporarily block.} */ +sigemptyset (&mask); +sigaddset (&mask, SIGUSR1); + +@dots{} + +/* @r{Wait for a signal to arrive.} */ +sigprocmask (SIG_BLOCK, &mask, &oldmask); +while (!usr_interrupt) + sigsuspend (&oldmask); +sigprocmask (SIG_UNBLOCK, &mask, NULL); +@end smallexample + +This last piece of code is a little tricky. The key point to remember +here is that when @code{sigsuspend} returns, it resets the process's +signal mask to the original value, the value from before the call to +@code{sigsuspend}---in this case, the @code{SIGUSR1} signal is once +again blocked. The second call to @code{sigprocmask} is +necessary to explicitly unblock this signal. + +One other point: you may be wondering why the @code{while} loop is +necessary at all, since the program is apparently only waiting for one +@code{SIGUSR1} signal. The answer is that the mask passed to +@code{sigsuspend} permits the process to be woken up by the delivery of +other kinds of signals, as well---for example, job control signals. If +the process is woken up by a signal that doesn't set +@code{usr_interrupt}, it just suspends itself again until the ``right'' +kind of signal eventually arrives. + +This technique takes a few more lines of preparation, but that is needed +just once for each kind of wait criterion you want to use. The code +that actually waits is just four lines. + +@node Signal Stack +@section Using a Separate Signal Stack + +A signal stack is a special area of memory to be used as the execution +stack during signal handlers. It should be fairly large, to avoid any +danger that it will overflow in turn; the macro @code{SIGSTKSZ} is +defined to a canonical size for signal stacks. You can use +@code{malloc} to allocate the space for the stack. Then call +@code{sigaltstack} or @code{sigstack} to tell the system to use that +space for the signal stack. + +You don't need to write signal handlers differently in order to use a +signal stack. Switching from one stack to the other happens +automatically. (Some non-GNU debuggers on some machines may get +confused if you examine a stack trace while a handler that uses the +signal stack is running.) + +There are two interfaces for telling the system to use a separate signal +stack. @code{sigstack} is the older interface, which comes from 4.2 +BSD. @code{sigaltstack} is the newer interface, and comes from 4.4 +BSD. The @code{sigaltstack} interface has the advantage that it does +not require your program to know which direction the stack grows, which +depends on the specific machine and operating system. + +@comment signal.h +@comment XPG +@deftp {Data Type} stack_t +This structure describes a signal stack. It contains the following members: + +@table @code +@item void *ss_sp +This points to the base of the signal stack. + +@item size_t ss_size +This is the size (in bytes) of the signal stack which @samp{ss_sp} points to. +You should set this to however much space you allocated for the stack. + +There are two macros defined in @file{signal.h} that you should use in +calculating this size: + +@vtable @code +@item SIGSTKSZ +This is the canonical size for a signal stack. It is judged to be +sufficient for normal uses. + +@item MINSIGSTKSZ +This is the amount of signal stack space the operating system needs just +to implement signal delivery. The size of a signal stack @strong{must} +be greater than this. + +For most cases, just using @code{SIGSTKSZ} for @code{ss_size} is +sufficient. But if you know how much stack space your program's signal +handlers will need, you may want to use a different size. In this case, +you should allocate @code{MINSIGSTKSZ} additional bytes for the signal +stack and increase @code{ss_size} accordingly. +@end vtable + +@item int ss_flags +This field contains the bitwise @sc{or} of these flags: + +@vtable @code +@item SS_DISABLE +This tells the system that it should not use the signal stack. + +@item SS_ONSTACK +This is set by the system, and indicates that the signal stack is +currently in use. If this bit is not set, then signals will be +delivered on the normal user stack. +@end vtable +@end table +@end deftp + +@comment signal.h +@comment XPG +@deftypefun int sigaltstack (const stack_t *restrict @var{stack}, stack_t *restrict @var{oldstack}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c Syscall on Linux and BSD; the HURD implementation takes a lock on +@c the hurd_self_sigstate-returned struct. +The @code{sigaltstack} function specifies an alternate stack for use +during signal handling. When a signal is received by the process and +its action indicates that the signal stack is used, the system arranges +a switch to the currently installed signal stack while the handler for +that signal is executed. + +If @var{oldstack} is not a null pointer, information about the currently +installed signal stack is returned in the location it points to. If +@var{stack} is not a null pointer, then this is installed as the new +stack for use by signal handlers. + +The return value is @code{0} on success and @code{-1} on failure. If +@code{sigaltstack} fails, it sets @code{errno} to one of these values: + +@table @code +@item EINVAL +You tried to disable a stack that was in fact currently in use. + +@item ENOMEM +The size of the alternate stack was too small. +It must be greater than @code{MINSIGSTKSZ}. +@end table +@end deftypefun + +Here is the older @code{sigstack} interface. You should use +@code{sigaltstack} instead on systems that have it. + +@comment signal.h +@comment BSD +@deftp {Data Type} {struct sigstack} +This structure describes a signal stack. It contains the following members: + +@table @code +@item void *ss_sp +This is the stack pointer. If the stack grows downwards on your +machine, this should point to the top of the area you allocated. If the +stack grows upwards, it should point to the bottom. + +@item int ss_onstack +This field is true if the process is currently using this stack. +@end table +@end deftp + +@comment signal.h +@comment BSD +@deftypefun int sigstack (struct sigstack *@var{stack}, struct sigstack *@var{oldstack}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c Lossy and dangerous (no size limit) wrapper for sigaltstack. +The @code{sigstack} function specifies an alternate stack for use during +signal handling. When a signal is received by the process and its +action indicates that the signal stack is used, the system arranges a +switch to the currently installed signal stack while the handler for +that signal is executed. + +If @var{oldstack} is not a null pointer, information about the currently +installed signal stack is returned in the location it points to. If +@var{stack} is not a null pointer, then this is installed as the new +stack for use by signal handlers. + +The return value is @code{0} on success and @code{-1} on failure. +@end deftypefun + +@node BSD Signal Handling +@section BSD Signal Handling + +This section describes alternative signal handling functions derived +from BSD Unix. These facilities were an advance, in their time; today, +they are mostly obsolete, and supported mainly for compatibility with +BSD Unix. + +There are many similarities between the BSD and POSIX signal handling +facilities, because the POSIX facilities were inspired by the BSD +facilities. Besides having different names for all the functions to +avoid conflicts, the main difference between the two is that BSD Unix +represents signal masks as an @code{int} bit mask, rather than as a +@code{sigset_t} object. + +The BSD facilities are declared in @file{signal.h}. +@pindex signal.h + +@comment signal.h +@comment XPG +@deftypefun int siginterrupt (int @var{signum}, int @var{failflag}) +@safety{@prelim{}@mtunsafe{@mtasuconst{:@mtssigintr{}}}@asunsafe{}@acunsafe{@acucorrupt{}}} +@c This calls sigaction twice, once to get the current sigaction for the +@c specified signal, another to apply the flags change. This could +@c override the effects of a concurrent sigaction call. It also +@c modifies without any guards the global _sigintr variable, that +@c bsd_signal reads from, and it may leave _sigintr modified without +@c overriding the active handler if cancelled between the two +@c operations. +This function specifies which approach to use when certain primitives +are interrupted by handling signal @var{signum}. If @var{failflag} is +false, signal @var{signum} restarts primitives. If @var{failflag} is +true, handling @var{signum} causes these primitives to fail with error +code @code{EINTR}. @xref{Interrupted Primitives}. +@end deftypefun + +@comment signal.h +@comment BSD +@deftypefn Macro int sigmask (int @var{signum}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c This just shifts signum. +This macro returns a signal mask that has the bit for signal @var{signum} +set. You can bitwise-OR the results of several calls to @code{sigmask} +together to specify more than one signal. For example, + +@smallexample +(sigmask (SIGTSTP) | sigmask (SIGSTOP) + | sigmask (SIGTTIN) | sigmask (SIGTTOU)) +@end smallexample + +@noindent +specifies a mask that includes all the job-control stop signals. +@end deftypefn + +@comment signal.h +@comment BSD +@deftypefun int sigblock (int @var{mask}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c On most POSIX systems, this is a wrapper for sigprocmask(SIG_BLOCK). +@c The exception are BSD systems other than 4.4, where it is a syscall. +@c sigblock @asulock/hurd @aculock/hurd +@c sigprocmask(SIG_BLOCK) dup @asulock/hurd @aculock/hurd [no @mtasurace:sigprocmask/bsd(SIG_UNBLOCK)] +This function is equivalent to @code{sigprocmask} (@pxref{Process Signal +Mask}) with a @var{how} argument of @code{SIG_BLOCK}: it adds the +signals specified by @var{mask} to the calling process's set of blocked +signals. The return value is the previous set of blocked signals. +@end deftypefun + +@comment signal.h +@comment BSD +@deftypefun int sigsetmask (int @var{mask}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c On most POSIX systems, this is a wrapper for sigprocmask(SIG_SETMASK). +@c The exception are BSD systems other than 4.4, where it is a syscall. +@c sigsetmask @asulock/hurd @aculock/hurd +@c sigprocmask(SIG_SETMASK) dup @asulock/hurd @aculock/hurd [no @mtasurace:sigprocmask/bsd(SIG_UNBLOCK)] +This function is equivalent to @code{sigprocmask} (@pxref{Process +Signal Mask}) with a @var{how} argument of @code{SIG_SETMASK}: it sets +the calling process's signal mask to @var{mask}. The return value is +the previous set of blocked signals. +@end deftypefun + +@comment signal.h +@comment BSD +@deftypefun int sigpause (int @var{mask}) +@safety{@prelim{}@mtunsafe{@mtasurace{:sigprocmask/!bsd!linux}}@asunsafe{@asulock{/hurd}}@acunsafe{@aculock{/hurd}}} +@c sigpause @mtasurace:sigprocmask/!bsd!linux @asulock/hurd @aculock/hurd +@c [posix] +@c __sigpause @mtasurace:sigprocmask/!bsd!linux @asulock/hurd @aculock/hurd +@c do_sigpause @mtasurace:sigprocmask/!bsd!linux @asulock/hurd @aculock/hurd +@c sigprocmask(0) dup @asulock/hurd @aculock/hurd [no @mtasurace:sigprocmask/bsd(SIG_UNBLOCK)] +@c sigdelset dup ok +@c sigset_set_old_mask dup ok +@c sigsuspend dup @mtasurace:sigprocmask/!bsd!linux @asulock/hurd @aculock/hurd +This function is the equivalent of @code{sigsuspend} (@pxref{Waiting +for a Signal}): it sets the calling process's signal mask to @var{mask}, +and waits for a signal to arrive. On return the previous set of blocked +signals is restored. +@end deftypefun diff --git a/REORG.TODO/manual/socket.texi b/REORG.TODO/manual/socket.texi new file mode 100644 index 0000000000..21b672badc --- /dev/null +++ b/REORG.TODO/manual/socket.texi @@ -0,0 +1,3761 @@ +@node Sockets, Low-Level Terminal Interface, Pipes and FIFOs, Top +@c %MENU% A more complicated IPC mechanism, with networking support +@chapter Sockets + +This chapter describes the GNU facilities for interprocess +communication using sockets. + +@cindex socket +@cindex interprocess communication, with sockets +A @dfn{socket} is a generalized interprocess communication channel. +Like a pipe, a socket is represented as a file descriptor. Unlike pipes +sockets support communication between unrelated processes, and even +between processes running on different machines that communicate over a +network. Sockets are the primary means of communicating with other +machines; @code{telnet}, @code{rlogin}, @code{ftp}, @code{talk} and the +other familiar network programs use sockets. + +Not all operating systems support sockets. In @theglibc{}, the +header file @file{sys/socket.h} exists regardless of the operating +system, and the socket functions always exist, but if the system does +not really support sockets these functions always fail. + +@strong{Incomplete:} We do not currently document the facilities for +broadcast messages or for configuring Internet interfaces. The +reentrant functions and some newer functions that are related to IPv6 +aren't documented either so far. + +@menu +* Socket Concepts:: Basic concepts you need to know about. +* Communication Styles::Stream communication, datagrams and other styles. +* Socket Addresses:: How socket names (``addresses'') work. +* Interface Naming:: Identifying specific network interfaces. +* Local Namespace:: Details about the local namespace. +* Internet Namespace:: Details about the Internet namespace. +* Misc Namespaces:: Other namespaces not documented fully here. +* Open/Close Sockets:: Creating sockets and destroying them. +* Connections:: Operations on sockets with connection state. +* Datagrams:: Operations on datagram sockets. +* Inetd:: Inetd is a daemon that starts servers on request. + The most convenient way to write a server + is to make it work with Inetd. +* Socket Options:: Miscellaneous low-level socket options. +* Networks Database:: Accessing the database of network names. +@end menu + +@node Socket Concepts +@section Socket Concepts + +@cindex communication style (of a socket) +@cindex style of communication (of a socket) +When you create a socket, you must specify the style of communication +you want to use and the type of protocol that should implement it. +The @dfn{communication style} of a socket defines the user-level +semantics of sending and receiving data on the socket. Choosing a +communication style specifies the answers to questions such as these: + +@itemize @bullet +@item +@cindex packet +@cindex byte stream +@cindex stream (sockets) +@strong{What are the units of data transmission?} Some communication +styles regard the data as a sequence of bytes with no larger +structure; others group the bytes into records (which are known in +this context as @dfn{packets}). + +@item +@cindex loss of data on sockets +@cindex data loss on sockets +@strong{Can data be lost during normal operation?} Some communication +styles guarantee that all the data sent arrives in the order it was +sent (barring system or network crashes); other styles occasionally +lose data as a normal part of operation, and may sometimes deliver +packets more than once or in the wrong order. + +Designing a program to use unreliable communication styles usually +involves taking precautions to detect lost or misordered packets and +to retransmit data as needed. + +@item +@strong{Is communication entirely with one partner?} Some +communication styles are like a telephone call---you make a +@dfn{connection} with one remote socket and then exchange data +freely. Other styles are like mailing letters---you specify a +destination address for each message you send. +@end itemize + +@cindex namespace (of socket) +@cindex domain (of socket) +@cindex socket namespace +@cindex socket domain +You must also choose a @dfn{namespace} for naming the socket. A socket +name (``address'') is meaningful only in the context of a particular +namespace. In fact, even the data type to use for a socket name may +depend on the namespace. Namespaces are also called ``domains'', but we +avoid that word as it can be confused with other usage of the same +term. Each namespace has a symbolic name that starts with @samp{PF_}. +A corresponding symbolic name starting with @samp{AF_} designates the +address format for that namespace. + +@cindex network protocol +@cindex protocol (of socket) +@cindex socket protocol +@cindex protocol family +Finally you must choose the @dfn{protocol} to carry out the +communication. The protocol determines what low-level mechanism is used +to transmit and receive data. Each protocol is valid for a particular +namespace and communication style; a namespace is sometimes called a +@dfn{protocol family} because of this, which is why the namespace names +start with @samp{PF_}. + +The rules of a protocol apply to the data passing between two programs, +perhaps on different computers; most of these rules are handled by the +operating system and you need not know about them. What you do need to +know about protocols is this: + +@itemize @bullet +@item +In order to have communication between two sockets, they must specify +the @emph{same} protocol. + +@item +Each protocol is meaningful with particular style/namespace +combinations and cannot be used with inappropriate combinations. For +example, the TCP protocol fits only the byte stream style of +communication and the Internet namespace. + +@item +For each combination of style and namespace there is a @dfn{default +protocol}, which you can request by specifying 0 as the protocol +number. And that's what you should normally do---use the default. +@end itemize + +Throughout the following description at various places +variables/parameters to denote sizes are required. And here the trouble +starts. In the first implementations the type of these variables was +simply @code{int}. On most machines at that time an @code{int} was 32 +bits wide, which created a @emph{de facto} standard requiring 32-bit +variables. This is important since references to variables of this type +are passed to the kernel. + +Then the POSIX people came and unified the interface with the words "all +size values are of type @code{size_t}". On 64-bit machines +@code{size_t} is 64 bits wide, so pointers to variables were no longer +possible. + +The Unix98 specification provides a solution by introducing a type +@code{socklen_t}. This type is used in all of the cases that POSIX +changed to use @code{size_t}. The only requirement of this type is that +it be an unsigned type of at least 32 bits. Therefore, implementations +which require that references to 32-bit variables be passed can be as +happy as implementations which use 64-bit values. + + +@node Communication Styles +@section Communication Styles + +@Theglibc{} includes support for several different kinds of sockets, +each with different characteristics. This section describes the +supported socket types. The symbolic constants listed here are +defined in @file{sys/socket.h}. +@pindex sys/socket.h + +@comment sys/socket.h +@comment BSD +@deftypevr Macro int SOCK_STREAM +The @code{SOCK_STREAM} style is like a pipe (@pxref{Pipes and FIFOs}). +It operates over a connection with a particular remote socket and +transmits data reliably as a stream of bytes. + +Use of this style is covered in detail in @ref{Connections}. +@end deftypevr + +@comment sys/socket.h +@comment BSD +@deftypevr Macro int SOCK_DGRAM +The @code{SOCK_DGRAM} style is used for sending +individually-addressed packets unreliably. +It is the diametrical opposite of @code{SOCK_STREAM}. + +Each time you write data to a socket of this kind, that data becomes +one packet. Since @code{SOCK_DGRAM} sockets do not have connections, +you must specify the recipient address with each packet. + +The only guarantee that the system makes about your requests to +transmit data is that it will try its best to deliver each packet you +send. It may succeed with the sixth packet after failing with the +fourth and fifth packets; the seventh packet may arrive before the +sixth, and may arrive a second time after the sixth. + +The typical use for @code{SOCK_DGRAM} is in situations where it is +acceptable to simply re-send a packet if no response is seen in a +reasonable amount of time. + +@xref{Datagrams}, for detailed information about how to use datagram +sockets. +@end deftypevr + +@ignore +@c This appears to be only for the NS domain, which we aren't +@c discussing and probably won't support either. +@comment sys/socket.h +@comment BSD +@deftypevr Macro int SOCK_SEQPACKET +This style is like @code{SOCK_STREAM} except that the data are +structured into packets. + +A program that receives data over a @code{SOCK_SEQPACKET} socket +should be prepared to read the entire message packet in a single call +to @code{read}; if it only reads part of the message, the remainder of +the message is simply discarded instead of being available for +subsequent calls to @code{read}. + +Many protocols do not support this communication style. +@end deftypevr +@end ignore + +@ignore +@comment sys/socket.h +@comment BSD +@deftypevr Macro int SOCK_RDM +This style is a reliable version of @code{SOCK_DGRAM}: it sends +individually addressed packets, but guarantees that each packet sent +arrives exactly once. + +@strong{Warning:} It is not clear this is actually supported +by any operating system. +@end deftypevr +@end ignore + +@comment sys/socket.h +@comment BSD +@deftypevr Macro int SOCK_RAW +This style provides access to low-level network protocols and +interfaces. Ordinary user programs usually have no need to use this +style. +@end deftypevr + +@node Socket Addresses +@section Socket Addresses + +@cindex address of socket +@cindex name of socket +@cindex binding a socket address +@cindex socket address (name) binding +The name of a socket is normally called an @dfn{address}. The +functions and symbols for dealing with socket addresses were named +inconsistently, sometimes using the term ``name'' and sometimes using +``address''. You can regard these terms as synonymous where sockets +are concerned. + +A socket newly created with the @code{socket} function has no +address. Other processes can find it for communication only if you +give it an address. We call this @dfn{binding} the address to the +socket, and the way to do it is with the @code{bind} function. + +You need only be concerned with the address of a socket if other processes +are to find it and start communicating with it. You can specify an +address for other sockets, but this is usually pointless; the first time +you send data from a socket, or use it to initiate a connection, the +system assigns an address automatically if you have not specified one. + +Occasionally a client needs to specify an address because the server +discriminates based on address; for example, the rsh and rlogin +protocols look at the client's socket address and only bypass password +checking if it is less than @code{IPPORT_RESERVED} (@pxref{Ports}). + +The details of socket addresses vary depending on what namespace you are +using. @xref{Local Namespace}, or @ref{Internet Namespace}, for specific +information. + +Regardless of the namespace, you use the same functions @code{bind} and +@code{getsockname} to set and examine a socket's address. These +functions use a phony data type, @code{struct sockaddr *}, to accept the +address. In practice, the address lives in a structure of some other +data type appropriate to the address format you are using, but you cast +its address to @code{struct sockaddr *} when you pass it to +@code{bind}. + +@menu +* Address Formats:: About @code{struct sockaddr}. +* Setting Address:: Binding an address to a socket. +* Reading Address:: Reading the address of a socket. +@end menu + +@node Address Formats +@subsection Address Formats + +The functions @code{bind} and @code{getsockname} use the generic data +type @code{struct sockaddr *} to represent a pointer to a socket +address. You can't use this data type effectively to interpret an +address or construct one; for that, you must use the proper data type +for the socket's namespace. + +Thus, the usual practice is to construct an address of the proper +namespace-specific type, then cast a pointer to @code{struct sockaddr *} +when you call @code{bind} or @code{getsockname}. + +The one piece of information that you can get from the @code{struct +sockaddr} data type is the @dfn{address format designator}. This tells +you which data type to use to understand the address fully. + +@pindex sys/socket.h +The symbols in this section are defined in the header file +@file{sys/socket.h}. + +@comment sys/socket.h +@comment BSD +@deftp {Data Type} {struct sockaddr} +The @code{struct sockaddr} type itself has the following members: + +@table @code +@item short int sa_family +This is the code for the address format of this address. It +identifies the format of the data which follows. + +@item char sa_data[14] +This is the actual socket address data, which is format-dependent. Its +length also depends on the format, and may well be more than 14. The +length 14 of @code{sa_data} is essentially arbitrary. +@end table +@end deftp + +Each address format has a symbolic name which starts with @samp{AF_}. +Each of them corresponds to a @samp{PF_} symbol which designates the +corresponding namespace. Here is a list of address format names: + +@vtable @code +@comment sys/socket.h +@comment POSIX +@item AF_LOCAL +This designates the address format that goes with the local namespace. +(@code{PF_LOCAL} is the name of that namespace.) @xref{Local Namespace +Details}, for information about this address format. + +@comment sys/socket.h +@comment BSD, Unix98 +@item AF_UNIX +This is a synonym for @code{AF_LOCAL}. Although @code{AF_LOCAL} is +mandated by POSIX.1g, @code{AF_UNIX} is portable to more systems. +@code{AF_UNIX} was the traditional name stemming from BSD, so even most +POSIX systems support it. It is also the name of choice in the Unix98 +specification. (The same is true for @code{PF_UNIX} +vs. @code{PF_LOCAL}). + +@comment sys/socket.h +@comment GNU +@item AF_FILE +This is another synonym for @code{AF_LOCAL}, for compatibility. +(@code{PF_FILE} is likewise a synonym for @code{PF_LOCAL}.) + +@comment sys/socket.h +@comment BSD +@item AF_INET +This designates the address format that goes with the Internet +namespace. (@code{PF_INET} is the name of that namespace.) +@xref{Internet Address Formats}. + +@comment sys/socket.h +@comment IPv6 Basic API +@item AF_INET6 +This is similar to @code{AF_INET}, but refers to the IPv6 protocol. +(@code{PF_INET6} is the name of the corresponding namespace.) + +@comment sys/socket.h +@comment BSD +@item AF_UNSPEC +This designates no particular address format. It is used only in rare +cases, such as to clear out the default destination address of a +``connected'' datagram socket. @xref{Sending Datagrams}. + +The corresponding namespace designator symbol @code{PF_UNSPEC} exists +for completeness, but there is no reason to use it in a program. +@end vtable + +@file{sys/socket.h} defines symbols starting with @samp{AF_} for many +different kinds of networks, most or all of which are not actually +implemented. We will document those that really work as we receive +information about how to use them. + +@node Setting Address +@subsection Setting the Address of a Socket + +@pindex sys/socket.h +Use the @code{bind} function to assign an address to a socket. The +prototype for @code{bind} is in the header file @file{sys/socket.h}. +For examples of use, see @ref{Local Socket Example}, or see @ref{Inet Example}. + +@comment sys/socket.h +@comment BSD +@deftypefun int bind (int @var{socket}, struct sockaddr *@var{addr}, socklen_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, except on Hurd. +The @code{bind} function assigns an address to the socket +@var{socket}. The @var{addr} and @var{length} arguments specify the +address; the detailed format of the address depends on the namespace. +The first part of the address is always the format designator, which +specifies a namespace, and says that the address is in the format of +that namespace. + +The return value is @code{0} on success and @code{-1} on failure. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{socket} argument is not a valid file descriptor. + +@item ENOTSOCK +The descriptor @var{socket} is not a socket. + +@item EADDRNOTAVAIL +The specified address is not available on this machine. + +@item EADDRINUSE +Some other socket is already using the specified address. + +@item EINVAL +The socket @var{socket} already has an address. + +@item EACCES +You do not have permission to access the requested address. (In the +Internet domain, only the super-user is allowed to specify a port number +in the range 0 through @code{IPPORT_RESERVED} minus one; see +@ref{Ports}.) +@end table + +Additional conditions may be possible depending on the particular namespace +of the socket. +@end deftypefun + +@node Reading Address +@subsection Reading the Address of a Socket + +@pindex sys/socket.h +Use the function @code{getsockname} to examine the address of an +Internet socket. The prototype for this function is in the header file +@file{sys/socket.h}. + +@comment sys/socket.h +@comment BSD +@deftypefun int getsockname (int @var{socket}, struct sockaddr *@var{addr}, socklen_t *@var{length-ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsmem{/hurd}}} +@c Direct syscall, except on Hurd, where it seems like it might leak +@c VM if cancelled. +The @code{getsockname} function returns information about the +address of the socket @var{socket} in the locations specified by the +@var{addr} and @var{length-ptr} arguments. Note that the +@var{length-ptr} is a pointer; you should initialize it to be the +allocation size of @var{addr}, and on return it contains the actual +size of the address data. + +The format of the address data depends on the socket namespace. The +length of the information is usually fixed for a given namespace, so +normally you can know exactly how much space is needed and can provide +that much. The usual practice is to allocate a place for the value +using the proper data type for the socket's namespace, then cast its +address to @code{struct sockaddr *} to pass it to @code{getsockname}. + +The return value is @code{0} on success and @code{-1} on error. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{socket} argument is not a valid file descriptor. + +@item ENOTSOCK +The descriptor @var{socket} is not a socket. + +@item ENOBUFS +There are not enough internal buffers available for the operation. +@end table +@end deftypefun + +You can't read the address of a socket in the file namespace. This is +consistent with the rest of the system; in general, there's no way to +find a file's name from a descriptor for that file. + +@node Interface Naming +@section Interface Naming + +Each network interface has a name. This usually consists of a few +letters that relate to the type of interface, which may be followed by a +number if there is more than one interface of that type. Examples +might be @code{lo} (the loopback interface) and @code{eth0} (the first +Ethernet interface). + +Although such names are convenient for humans, it would be clumsy to +have to use them whenever a program needs to refer to an interface. In +such situations an interface is referred to by its @dfn{index}, which is +an arbitrarily-assigned small positive integer. + +The following functions, constants and data types are declared in the +header file @file{net/if.h}. + +@comment net/if.h +@deftypevr Constant size_t IFNAMSIZ +This constant defines the maximum buffer size needed to hold an +interface name, including its terminating zero byte. +@end deftypevr + +@comment net/if.h +@comment IPv6 basic API +@deftypefun {unsigned int} if_nametoindex (const char *@var{ifname}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c It opens a socket to use ioctl on the fd to get the index. +@c opensock may call socket and access multiple times until it finds a +@c socket family that works. The Linux implementation has a potential +@c concurrency issue WRT last_type and last_family not being updated +@c atomically, but it is harmless; the generic implementation, OTOH, +@c takes a lock, which makes all callers AS- and AC-Unsafe. +@c opensock @asulock @aculock @acsfd +This function yields the interface index corresponding to a particular +name. If no interface exists with the name given, it returns 0. +@end deftypefun + +@comment net/if.h +@comment IPv6 basic API +@deftypefun {char *} if_indextoname (unsigned int @var{ifindex}, char *@var{ifname}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c It opens a socket with opensock to use ioctl on the fd to get the +@c name from the index. +This function maps an interface index to its corresponding name. The +returned name is placed in the buffer pointed to by @code{ifname}, which +must be at least @code{IFNAMSIZ} bytes in length. If the index was +invalid, the function's return value is a null pointer, otherwise it is +@code{ifname}. +@end deftypefun + +@comment net/if.h +@comment IPv6 basic API +@deftp {Data Type} {struct if_nameindex} +This data type is used to hold the information about a single +interface. It has the following members: + +@table @code +@item unsigned int if_index; +This is the interface index. + +@item char *if_name +This is the null-terminated index name. + +@end table +@end deftp + +@comment net/if.h +@comment IPv6 basic API +@deftypefun {struct if_nameindex *} if_nameindex (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{/hurd}}@acunsafe{@aculock{/hurd} @acsfd{} @acsmem{}}} +@c if_nameindex @ascuheap @asulock/hurd @aculock/hurd @acsfd @acsmem +@c [linux] +@c netlink_open @acsfd @acsmem/hurd +@c socket dup @acsfd +@c memset dup ok +@c bind dup ok +@c netlink_close dup @acsfd +@c getsockname dup @acsmem/hurd +@c netlink_request @ascuheap @acsmem +@c getpagesize dup ok +@c malloc dup @ascuheap @acsmem +@c netlink_sendreq ok +@c memset dup ok +@c sendto dup ok +@c recvmsg dup ok +@c memcpy dup ok +@c free dup @ascuheap @acsmem +@c netlink_free_handle @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c netlink_close @acsfd +@c close dup @acsfd +@c malloc dup @asuheap @acsmem +@c strndup @ascuheap @acsmem +@c if_freenameindex @ascuheap @acsmem +@c [hurd] +@c opensock dup @asulock @aculock @acsfd +@c hurd_socket_server ok +@c pfinet_siocgifconf ok +@c malloc @ascuheap @acsmem +@c strdup @ascuheap @acsmem +@c ioctl dup ok +@c free @ascuheap @acsmem +This function returns an array of @code{if_nameindex} structures, one +for every interface that is present. The end of the list is indicated +by a structure with an interface of 0 and a null name pointer. If an +error occurs, this function returns a null pointer. + +The returned structure must be freed with @code{if_freenameindex} after +use. +@end deftypefun + +@comment net/if.h +@comment IPv6 basic API +@deftypefun void if_freenameindex (struct if_nameindex *@var{ptr}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c if_freenameindex @ascuheap @acsmem +@c free dup @ascuheap @acsmem +This function frees the structure returned by an earlier call to +@code{if_nameindex}. +@end deftypefun + +@node Local Namespace +@section The Local Namespace +@cindex local namespace, for sockets + +This section describes the details of the local namespace, whose +symbolic name (required when you create a socket) is @code{PF_LOCAL}. +The local namespace is also known as ``Unix domain sockets''. Another +name is file namespace since socket addresses are normally implemented +as file names. + +@menu +* Concepts: Local Namespace Concepts. What you need to understand. +* Details: Local Namespace Details. Address format, symbolic names, etc. +* Example: Local Socket Example. Example of creating a socket. +@end menu + +@node Local Namespace Concepts +@subsection Local Namespace Concepts + +In the local namespace socket addresses are file names. You can specify +any file name you want as the address of the socket, but you must have +write permission on the directory containing it. +@c XXX The following was said to be wrong. +@c In order to connect to a socket you must have read permission for it. +It's common to put these files in the @file{/tmp} directory. + +One peculiarity of the local namespace is that the name is only used +when opening the connection; once open the address is not meaningful and +may not exist. + +Another peculiarity is that you cannot connect to such a socket from +another machine--not even if the other machine shares the file system +which contains the name of the socket. You can see the socket in a +directory listing, but connecting to it never succeeds. Some programs +take advantage of this, such as by asking the client to send its own +process ID, and using the process IDs to distinguish between clients. +However, we recommend you not use this method in protocols you design, +as we might someday permit connections from other machines that mount +the same file systems. Instead, send each new client an identifying +number if you want it to have one. + +After you close a socket in the local namespace, you should delete the +file name from the file system. Use @code{unlink} or @code{remove} to +do this; see @ref{Deleting Files}. + +The local namespace supports just one protocol for any communication +style; it is protocol number @code{0}. + +@node Local Namespace Details +@subsection Details of Local Namespace + +@pindex sys/socket.h +To create a socket in the local namespace, use the constant +@code{PF_LOCAL} as the @var{namespace} argument to @code{socket} or +@code{socketpair}. This constant is defined in @file{sys/socket.h}. + +@comment sys/socket.h +@comment POSIX +@deftypevr Macro int PF_LOCAL +This designates the local namespace, in which socket addresses are local +names, and its associated family of protocols. @code{PF_LOCAL} is the +macro used by POSIX.1g. +@end deftypevr + +@comment sys/socket.h +@comment BSD +@deftypevr Macro int PF_UNIX +This is a synonym for @code{PF_LOCAL}, for compatibility's sake. +@end deftypevr + +@comment sys/socket.h +@comment GNU +@deftypevr Macro int PF_FILE +This is a synonym for @code{PF_LOCAL}, for compatibility's sake. +@end deftypevr + +The structure for specifying socket names in the local namespace is +defined in the header file @file{sys/un.h}: +@pindex sys/un.h + +@comment sys/un.h +@comment BSD +@deftp {Data Type} {struct sockaddr_un} +This structure is used to specify local namespace socket addresses. It has +the following members: + +@table @code +@item short int sun_family +This identifies the address family or format of the socket address. +You should store the value @code{AF_LOCAL} to designate the local +namespace. @xref{Socket Addresses}. + +@item char sun_path[108] +This is the file name to use. + +@strong{Incomplete:} Why is 108 a magic number? RMS suggests making +this a zero-length array and tweaking the following example to use +@code{alloca} to allocate an appropriate amount of storage based on +the length of the filename. +@end table +@end deftp + +You should compute the @var{length} parameter for a socket address in +the local namespace as the sum of the size of the @code{sun_family} +component and the string length (@emph{not} the allocation size!) of +the file name string. This can be done using the macro @code{SUN_LEN}: + +@comment sys/un.h +@comment BSD +@deftypefn {Macro} int SUN_LEN (@emph{struct sockaddr_un *} @var{ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro computes the length of the socket address in the local namespace. +@end deftypefn + +@node Local Socket Example +@subsection Example of Local-Namespace Sockets + +Here is an example showing how to create and name a socket in the local +namespace. + +@smallexample +@include mkfsock.c.texi +@end smallexample + +@node Internet Namespace +@section The Internet Namespace +@cindex Internet namespace, for sockets + +This section describes the details of the protocols and socket naming +conventions used in the Internet namespace. + +Originally the Internet namespace used only IP version 4 (IPv4). With +the growing number of hosts on the Internet, a new protocol with a +larger address space was necessary: IP version 6 (IPv6). IPv6 +introduces 128-bit addresses (IPv4 has 32-bit addresses) and other +features, and will eventually replace IPv4. + +To create a socket in the IPv4 Internet namespace, use the symbolic name +@code{PF_INET} of this namespace as the @var{namespace} argument to +@code{socket} or @code{socketpair}. For IPv6 addresses you need the +macro @code{PF_INET6}. These macros are defined in @file{sys/socket.h}. +@pindex sys/socket.h + +@comment sys/socket.h +@comment BSD +@deftypevr Macro int PF_INET +This designates the IPv4 Internet namespace and associated family of +protocols. +@end deftypevr + +@comment sys/socket.h +@comment X/Open +@deftypevr Macro int PF_INET6 +This designates the IPv6 Internet namespace and associated family of +protocols. +@end deftypevr + +A socket address for the Internet namespace includes the following components: + +@itemize @bullet +@item +The address of the machine you want to connect to. Internet addresses +can be specified in several ways; these are discussed in @ref{Internet +Address Formats}, @ref{Host Addresses} and @ref{Host Names}. + +@item +A port number for that machine. @xref{Ports}. +@end itemize + +You must ensure that the address and port number are represented in a +canonical format called @dfn{network byte order}. @xref{Byte Order}, +for information about this. + +@menu +* Internet Address Formats:: How socket addresses are specified in the + Internet namespace. +* Host Addresses:: All about host addresses of Internet host. +* Ports:: Internet port numbers. +* Services Database:: Ports may have symbolic names. +* Byte Order:: Different hosts may use different byte + ordering conventions; you need to + canonicalize host address and port number. +* Protocols Database:: Referring to protocols by name. +* Inet Example:: Putting it all together. +@end menu + +@node Internet Address Formats +@subsection Internet Socket Address Formats + +In the Internet namespace, for both IPv4 (@code{AF_INET}) and IPv6 +(@code{AF_INET6}), a socket address consists of a host address +and a port on that host. In addition, the protocol you choose serves +effectively as a part of the address because local port numbers are +meaningful only within a particular protocol. + +The data types for representing socket addresses in the Internet namespace +are defined in the header file @file{netinet/in.h}. +@pindex netinet/in.h + +@comment netinet/in.h +@comment BSD +@deftp {Data Type} {struct sockaddr_in} +This is the data type used to represent socket addresses in the +Internet namespace. It has the following members: + +@table @code +@item sa_family_t sin_family +This identifies the address family or format of the socket address. +You should store the value @code{AF_INET} in this member. +@xref{Socket Addresses}. + +@item struct in_addr sin_addr +This is the Internet address of the host machine. @xref{Host +Addresses}, and @ref{Host Names}, for how to get a value to store +here. + +@item unsigned short int sin_port +This is the port number. @xref{Ports}. +@end table +@end deftp + +When you call @code{bind} or @code{getsockname}, you should specify +@code{sizeof (struct sockaddr_in)} as the @var{length} parameter if +you are using an IPv4 Internet namespace socket address. + +@deftp {Data Type} {struct sockaddr_in6} +This is the data type used to represent socket addresses in the IPv6 +namespace. It has the following members: + +@table @code +@item sa_family_t sin6_family +This identifies the address family or format of the socket address. +You should store the value of @code{AF_INET6} in this member. +@xref{Socket Addresses}. + +@item struct in6_addr sin6_addr +This is the IPv6 address of the host machine. @xref{Host +Addresses}, and @ref{Host Names}, for how to get a value to store +here. + +@item uint32_t sin6_flowinfo +This is a currently unimplemented field. + +@item uint16_t sin6_port +This is the port number. @xref{Ports}. + +@end table +@end deftp + +@node Host Addresses +@subsection Host Addresses + +Each computer on the Internet has one or more @dfn{Internet addresses}, +numbers which identify that computer among all those on the Internet. +Users typically write IPv4 numeric host addresses as sequences of four +numbers, separated by periods, as in @samp{128.52.46.32}, and IPv6 +numeric host addresses as sequences of up to eight numbers separated by +colons, as in @samp{5f03:1200:836f:c100::1}. + +Each computer also has one or more @dfn{host names}, which are strings +of words separated by periods, as in @samp{www.gnu.org}. + +Programs that let the user specify a host typically accept both numeric +addresses and host names. To open a connection a program needs a +numeric address, and so must convert a host name to the numeric address +it stands for. + +@menu +* Abstract Host Addresses:: What a host number consists of. +* Data type: Host Address Data Type. Data type for a host number. +* Functions: Host Address Functions. Functions to operate on them. +* Names: Host Names. Translating host names to host numbers. +@end menu + +@node Abstract Host Addresses +@subsubsection Internet Host Addresses +@cindex host address, Internet +@cindex Internet host address + +@ifinfo +Each computer on the Internet has one or more Internet addresses, +numbers which identify that computer among all those on the Internet. +@end ifinfo + +@cindex network number +@cindex local network address number +An IPv4 Internet host address is a number containing four bytes of data. +Historically these are divided into two parts, a @dfn{network number} and a +@dfn{local network address number} within that network. In the +mid-1990s classless addresses were introduced which changed this +behavior. Since some functions implicitly expect the old definitions, +we first describe the class-based network and will then describe +classless addresses. IPv6 uses only classless addresses and therefore +the following paragraphs don't apply. + +The class-based IPv4 network number consists of the first one, two or +three bytes; the rest of the bytes are the local address. + +IPv4 network numbers are registered with the Network Information Center +(NIC), and are divided into three classes---A, B and C. The local +network address numbers of individual machines are registered with the +administrator of the particular network. + +Class A networks have single-byte numbers in the range 0 to 127. There +are only a small number of Class A networks, but they can each support a +very large number of hosts. Medium-sized Class B networks have two-byte +network numbers, with the first byte in the range 128 to 191. Class C +networks are the smallest; they have three-byte network numbers, with +the first byte in the range 192-255. Thus, the first 1, 2, or 3 bytes +of an Internet address specify a network. The remaining bytes of the +Internet address specify the address within that network. + +The Class A network 0 is reserved for broadcast to all networks. In +addition, the host number 0 within each network is reserved for broadcast +to all hosts in that network. These uses are obsolete now but for +compatibility reasons you shouldn't use network 0 and host number 0. + +The Class A network 127 is reserved for loopback; you can always use +the Internet address @samp{127.0.0.1} to refer to the host machine. + +Since a single machine can be a member of multiple networks, it can +have multiple Internet host addresses. However, there is never +supposed to be more than one machine with the same host address. + +@c !!! this section could document the IN_CLASS* macros in <netinet/in.h>. +@c No, it shouldn't since they're obsolete. + +@cindex standard dot notation, for Internet addresses +@cindex dot notation, for Internet addresses +There are four forms of the @dfn{standard numbers-and-dots notation} +for Internet addresses: + +@table @code +@item @var{a}.@var{b}.@var{c}.@var{d} +This specifies all four bytes of the address individually and is the +commonly used representation. + +@item @var{a}.@var{b}.@var{c} +The last part of the address, @var{c}, is interpreted as a 2-byte quantity. +This is useful for specifying host addresses in a Class B network with +network address number @code{@var{a}.@var{b}}. + +@item @var{a}.@var{b} +The last part of the address, @var{b}, is interpreted as a 3-byte quantity. +This is useful for specifying host addresses in a Class A network with +network address number @var{a}. + +@item @var{a} +If only one part is given, this corresponds directly to the host address +number. +@end table + +Within each part of the address, the usual C conventions for specifying +the radix apply. In other words, a leading @samp{0x} or @samp{0X} implies +hexadecimal radix; a leading @samp{0} implies octal; and otherwise decimal +radix is assumed. + +@subsubheading Classless Addresses + +IPv4 addresses (and IPv6 addresses also) are now considered classless; +the distinction between classes A, B and C can be ignored. Instead an +IPv4 host address consists of a 32-bit address and a 32-bit mask. The +mask contains set bits for the network part and cleared bits for the +host part. The network part is contiguous from the left, with the +remaining bits representing the host. As a consequence, the netmask can +simply be specified as the number of set bits. Classes A, B and C are +just special cases of this general rule. For example, class A addresses +have a netmask of @samp{255.0.0.0} or a prefix length of 8. + +Classless IPv4 network addresses are written in numbers-and-dots +notation with the prefix length appended and a slash as separator. For +example the class A network 10 is written as @samp{10.0.0.0/8}. + +@subsubheading IPv6 Addresses + +IPv6 addresses contain 128 bits (IPv4 has 32 bits) of data. A host +address is usually written as eight 16-bit hexadecimal numbers that are +separated by colons. Two colons are used to abbreviate strings of +consecutive zeros. For example, the IPv6 loopback address +@samp{0:0:0:0:0:0:0:1} can just be written as @samp{::1}. + +@node Host Address Data Type +@subsubsection Host Address Data Type + +IPv4 Internet host addresses are represented in some contexts as integers +(type @code{uint32_t}). In other contexts, the integer is +packaged inside a structure of type @code{struct in_addr}. It would +be better if the usage were made consistent, but it is not hard to extract +the integer from the structure or put the integer into a structure. + +You will find older code that uses @code{unsigned long int} for +IPv4 Internet host addresses instead of @code{uint32_t} or @code{struct +in_addr}. Historically @code{unsigned long int} was a 32-bit number but +with 64-bit machines this has changed. Using @code{unsigned long int} +might break the code if it is used on machines where this type doesn't +have 32 bits. @code{uint32_t} is specified by Unix98 and guaranteed to have +32 bits. + +IPv6 Internet host addresses have 128 bits and are packaged inside a +structure of type @code{struct in6_addr}. + +The following basic definitions for Internet addresses are declared in +the header file @file{netinet/in.h}: +@pindex netinet/in.h + +@comment netinet/in.h +@comment BSD +@deftp {Data Type} {struct in_addr} +This data type is used in certain contexts to contain an IPv4 Internet +host address. It has just one field, named @code{s_addr}, which records +the host address number as an @code{uint32_t}. +@end deftp + +@comment netinet/in.h +@comment BSD +@deftypevr Macro {uint32_t} INADDR_LOOPBACK +You can use this constant to stand for ``the address of this machine,'' +instead of finding its actual address. It is the IPv4 Internet address +@samp{127.0.0.1}, which is usually called @samp{localhost}. This +special constant saves you the trouble of looking up the address of your +own machine. Also, the system usually implements @code{INADDR_LOOPBACK} +specially, avoiding any network traffic for the case of one machine +talking to itself. +@end deftypevr + +@comment netinet/in.h +@comment BSD +@deftypevr Macro {uint32_t} INADDR_ANY +You can use this constant to stand for ``any incoming address'' when +binding to an address. @xref{Setting Address}. This is the usual +address to give in the @code{sin_addr} member of @w{@code{struct +sockaddr_in}} when you want to accept Internet connections. +@end deftypevr + +@comment netinet/in.h +@comment BSD +@deftypevr Macro {uint32_t} INADDR_BROADCAST +This constant is the address you use to send a broadcast message. +@c !!! broadcast needs further documented +@end deftypevr + +@comment netinet/in.h +@comment BSD +@deftypevr Macro {uint32_t} INADDR_NONE +This constant is returned by some functions to indicate an error. +@end deftypevr + +@comment netinet/in.h +@comment IPv6 basic API +@deftp {Data Type} {struct in6_addr} +This data type is used to store an IPv6 address. It stores 128 bits of +data, which can be accessed (via a union) in a variety of ways. +@end deftp + +@comment netinet/in.h +@comment IPv6 basic API +@deftypevr Constant {struct in6_addr} in6addr_loopback +This constant is the IPv6 address @samp{::1}, the loopback address. See +above for a description of what this means. The macro +@code{IN6ADDR_LOOPBACK_INIT} is provided to allow you to initialize your +own variables to this value. +@end deftypevr + +@comment netinet/in.h +@comment IPv6 basic API +@deftypevr Constant {struct in6_addr} in6addr_any +This constant is the IPv6 address @samp{::}, the unspecified address. See +above for a description of what this means. The macro +@code{IN6ADDR_ANY_INIT} is provided to allow you to initialize your +own variables to this value. +@end deftypevr + +@node Host Address Functions +@subsubsection Host Address Functions + +@pindex arpa/inet.h +@noindent +These additional functions for manipulating Internet addresses are +declared in the header file @file{arpa/inet.h}. They represent Internet +addresses in network byte order, and network numbers and +local-address-within-network numbers in host byte order. @xref{Byte +Order}, for an explanation of network and host byte order. + +@comment arpa/inet.h +@comment BSD +@deftypefun int inet_aton (const char *@var{name}, struct in_addr *@var{addr}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c inet_aton @mtslocale +@c isdigit dup @mtslocale +@c strtoul dup @mtslocale +@c isascii dup @mtslocale +@c isspace dup @mtslocale +@c htonl dup ok +This function converts the IPv4 Internet host address @var{name} +from the standard numbers-and-dots notation into binary data and stores +it in the @code{struct in_addr} that @var{addr} points to. +@code{inet_aton} returns nonzero if the address is valid, zero if not. +@end deftypefun + +@comment arpa/inet.h +@comment BSD +@deftypefun {uint32_t} inet_addr (const char *@var{name}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c inet_addr @mtslocale +@c inet_aton dup @mtslocale +This function converts the IPv4 Internet host address @var{name} from the +standard numbers-and-dots notation into binary data. If the input is +not valid, @code{inet_addr} returns @code{INADDR_NONE}. This is an +obsolete interface to @code{inet_aton}, described immediately above. It +is obsolete because @code{INADDR_NONE} is a valid address +(255.255.255.255), and @code{inet_aton} provides a cleaner way to +indicate error return. +@end deftypefun + +@comment arpa/inet.h +@comment BSD +@deftypefun {uint32_t} inet_network (const char *@var{name}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c inet_network @mtslocale +@c isdigit dup @mtslocale +@c isxdigit dup @mtslocale +@c tolower dup @mtslocale +@c isspace dup @mtslocale +This function extracts the network number from the address @var{name}, +given in the standard numbers-and-dots notation. The returned address is +in host order. If the input is not valid, @code{inet_network} returns +@code{-1}. + +The function works only with traditional IPv4 class A, B and C network +types. It doesn't work with classless addresses and shouldn't be used +anymore. +@end deftypefun + +@comment arpa/inet.h +@comment BSD +@deftypefun {char *} inet_ntoa (struct in_addr @var{addr}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asurace{}}@acsafe{}} +@c inet_ntoa @mtslocale @asurace +@c writes to a thread-local static buffer +@c snprintf @mtslocale [no @ascuheap or @acsmem] +This function converts the IPv4 Internet host address @var{addr} to a +string in the standard numbers-and-dots notation. The return value is +a pointer into a statically-allocated buffer. Subsequent calls will +overwrite the same buffer, so you should copy the string if you need +to save it. + +In multi-threaded programs each thread has its own statically-allocated +buffer. But still subsequent calls of @code{inet_ntoa} in the same +thread will overwrite the result of the last call. + +Instead of @code{inet_ntoa} the newer function @code{inet_ntop} which is +described below should be used since it handles both IPv4 and IPv6 +addresses. +@end deftypefun + +@comment arpa/inet.h +@comment BSD +@deftypefun {struct in_addr} inet_makeaddr (uint32_t @var{net}, uint32_t @var{local}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c inet_makeaddr ok +@c htonl dup ok +This function makes an IPv4 Internet host address by combining the network +number @var{net} with the local-address-within-network number +@var{local}. +@end deftypefun + +@comment arpa/inet.h +@comment BSD +@deftypefun uint32_t inet_lnaof (struct in_addr @var{addr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c inet_lnaof ok +@c ntohl dup ok +@c IN_CLASSA ok +@c IN_CLASSB ok +This function returns the local-address-within-network part of the +Internet host address @var{addr}. + +The function works only with traditional IPv4 class A, B and C network +types. It doesn't work with classless addresses and shouldn't be used +anymore. +@end deftypefun + +@comment arpa/inet.h +@comment BSD +@deftypefun uint32_t inet_netof (struct in_addr @var{addr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c inet_netof ok +@c ntohl dup ok +@c IN_CLASSA ok +@c IN_CLASSB ok +This function returns the network number part of the Internet host +address @var{addr}. + +The function works only with traditional IPv4 class A, B and C network +types. It doesn't work with classless addresses and shouldn't be used +anymore. +@end deftypefun + +@comment arpa/inet.h +@comment IPv6 basic API +@deftypefun int inet_pton (int @var{af}, const char *@var{cp}, void *@var{buf}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c inet_pton @mtslocale +@c inet_pton4 ok +@c memcpy dup ok +@c inet_pton6 @mtslocale +@c memset dup ok +@c tolower dup @mtslocale +@c strchr dup ok +@c inet_pton4 dup ok +@c memcpy dup ok +This function converts an Internet address (either IPv4 or IPv6) from +presentation (textual) to network (binary) format. @var{af} should be +either @code{AF_INET} or @code{AF_INET6}, as appropriate for the type of +address being converted. @var{cp} is a pointer to the input string, and +@var{buf} is a pointer to a buffer for the result. It is the caller's +responsibility to make sure the buffer is large enough. +@end deftypefun + +@comment arpa/inet.h +@comment IPv6 basic API +@deftypefun {const char *} inet_ntop (int @var{af}, const void *@var{cp}, char *@var{buf}, socklen_t @var{len}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c inet_ntop @mtslocale +@c inet_ntop4 @mtslocale +@c sprintf dup @mtslocale [no @ascuheap or @acsmem] +@c strcpy dup ok +@c inet_ntop6 @mtslocale +@c memset dup ok +@c inet_ntop4 dup @mtslocale +@c sprintf dup @mtslocale [no @ascuheap or @acsmem] +@c strcpy dup ok +This function converts an Internet address (either IPv4 or IPv6) from +network (binary) to presentation (textual) form. @var{af} should be +either @code{AF_INET} or @code{AF_INET6}, as appropriate. @var{cp} is a +pointer to the address to be converted. @var{buf} should be a pointer +to a buffer to hold the result, and @var{len} is the length of this +buffer. The return value from the function will be this buffer address. +@end deftypefun + +@node Host Names +@subsubsection Host Names +@cindex hosts database +@cindex converting host name to address +@cindex converting host address to name + +Besides the standard numbers-and-dots notation for Internet addresses, +you can also refer to a host by a symbolic name. The advantage of a +symbolic name is that it is usually easier to remember. For example, +the machine with Internet address @samp{158.121.106.19} is also known as +@samp{alpha.gnu.org}; and other machines in the @samp{gnu.org} +domain can refer to it simply as @samp{alpha}. + +@pindex /etc/hosts +@pindex netdb.h +Internally, the system uses a database to keep track of the mapping +between host names and host numbers. This database is usually either +the file @file{/etc/hosts} or an equivalent provided by a name server. +The functions and other symbols for accessing this database are declared +in @file{netdb.h}. They are BSD features, defined unconditionally if +you include @file{netdb.h}. + +@comment netdb.h +@comment BSD +@deftp {Data Type} {struct hostent} +This data type is used to represent an entry in the hosts database. It +has the following members: + +@table @code +@item char *h_name +This is the ``official'' name of the host. + +@item char **h_aliases +These are alternative names for the host, represented as a null-terminated +vector of strings. + +@item int h_addrtype +This is the host address type; in practice, its value is always either +@code{AF_INET} or @code{AF_INET6}, with the latter being used for IPv6 +hosts. In principle other kinds of addresses could be represented in +the database as well as Internet addresses; if this were done, you +might find a value in this field other than @code{AF_INET} or +@code{AF_INET6}. @xref{Socket Addresses}. + +@item int h_length +This is the length, in bytes, of each address. + +@item char **h_addr_list +This is the vector of addresses for the host. (Recall that the host +might be connected to multiple networks and have different addresses on +each one.) The vector is terminated by a null pointer. + +@item char *h_addr +This is a synonym for @code{h_addr_list[0]}; in other words, it is the +first host address. +@end table +@end deftp + +As far as the host database is concerned, each address is just a block +of memory @code{h_length} bytes long. But in other contexts there is an +implicit assumption that you can convert IPv4 addresses to a +@code{struct in_addr} or an @code{uint32_t}. Host addresses in +a @code{struct hostent} structure are always given in network byte +order; see @ref{Byte Order}. + +You can use @code{gethostbyname}, @code{gethostbyname2} or +@code{gethostbyaddr} to search the hosts database for information about +a particular host. The information is returned in a +statically-allocated structure; you must copy the information if you +need to save it across calls. You can also use @code{getaddrinfo} and +@code{getnameinfo} to obtain this information. + +@comment netdb.h +@comment BSD +@deftypefun {struct hostent *} gethostbyname (const char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasurace{:hostbyname} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{} @acsfd{}}} +@c gethostbyname @mtasurace:hostbyname @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c nss_hostname_digits_dots @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c res_maybe_init(!preinit) @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c res_iclose @acsuheap @acsmem @acsfd +@c close_not_cancel_no_status dup @acsfd +@c free dup @acsuheap @acsmem +@c res_vinit @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c res_randomid ok +@c getpid dup ok +@c getenv dup @mtsenv +@c strncpy dup ok +@c fopen dup @ascuheap @asulock @acsmem @acsfd @aculock +@c fsetlocking dup ok [no concurrent uses] +@c fgets_unlocked dup ok [no concurrent uses] +@c MATCH ok +@c strncmp dup ok +@c strpbrk dup ok +@c strchr dup ok +@c inet_aton dup @mtslocale +@c htons dup +@c inet_pton dup @mtslocale +@c malloc dup @ascuheap @acsmem +@c IN6_IS_ADDR_LINKLOCAL ok +@c htonl dup ok +@c IN6_IS_ADDR_MC_LINKLOCAL ok +@c if_nametoindex dup @asulock @aculock @acsfd +@c strtoul dup @mtslocale +@c ISSORTMASK ok +@c strchr dup ok +@c isascii dup @mtslocale +@c isspace dup @mtslocale +@c net_mask ok +@c ntohl dup ok +@c IN_CLASSA dup ok +@c htonl dup ok +@c IN_CLASSB dup ok +@c res_setoptions @mtslocale +@c strncmp dup ok +@c atoi dup @mtslocale +@c fclose dup @ascuheap @asulock @aculock @acsmem @acsfd +@c inet_makeaddr dup ok +@c gethostname dup ok +@c strcpy dup ok +@c rawmemchr dup ok +@c res_ninit @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c res_vinit dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c isdigit dup @mtslocale +@c isxdigit dup @mtslocale +@c strlen dup ok +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c memset dup ok +@c inet_aton dup @mtslocale +@c inet_pton dup @mtslocale +@c strcpy dup ok +@c memcpy dup ok +@c strchr dup ok +@c gethostbyname_r dup @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c set_h_errno ok +The @code{gethostbyname} function returns information about the host +named @var{name}. If the lookup fails, it returns a null pointer. +@end deftypefun + +@comment netdb.h +@comment IPv6 Basic API +@deftypefun {struct hostent *} gethostbyname2 (const char *@var{name}, int @var{af}) +@safety{@prelim{}@mtunsafe{@mtasurace{:hostbyname2} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{} @acsfd{}}} +@c gethostbyname2 @mtasurace:hostbyname2 @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c nss_hostname_digits_dots dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c gethostbyname2_r dup @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c set_h_errno dup ok +The @code{gethostbyname2} function is like @code{gethostbyname}, but +allows the caller to specify the desired address family (e.g.@: +@code{AF_INET} or @code{AF_INET6}) of the result. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun {struct hostent *} gethostbyaddr (const void *@var{addr}, socklen_t @var{length}, int @var{format}) +@safety{@prelim{}@mtunsafe{@mtasurace{:hostbyaddr} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{} @acsfd{}}} +@c gethostbyaddr @mtasurace:hostbyaddr @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c gethostbyaddr_r dup @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c set_h_errno dup ok +The @code{gethostbyaddr} function returns information about the host +with Internet address @var{addr}. The parameter @var{addr} is not +really a pointer to char - it can be a pointer to an IPv4 or an IPv6 +address. The @var{length} argument is the size (in bytes) of the address +at @var{addr}. @var{format} specifies the address format; for an IPv4 +Internet address, specify a value of @code{AF_INET}; for an IPv6 +Internet address, use @code{AF_INET6}. + +If the lookup fails, @code{gethostbyaddr} returns a null pointer. +@end deftypefun + +@vindex h_errno +If the name lookup by @code{gethostbyname} or @code{gethostbyaddr} +fails, you can find out the reason by looking at the value of the +variable @code{h_errno}. (It would be cleaner design for these +functions to set @code{errno}, but use of @code{h_errno} is compatible +with other systems.) + +Here are the error codes that you may find in @code{h_errno}: + +@vtable @code +@comment netdb.h +@comment BSD +@item HOST_NOT_FOUND +No such host is known in the database. + +@comment netdb.h +@comment BSD +@item TRY_AGAIN +This condition happens when the name server could not be contacted. If +you try again later, you may succeed then. + +@comment netdb.h +@comment BSD +@item NO_RECOVERY +A non-recoverable error occurred. + +@comment netdb.h +@comment BSD +@item NO_ADDRESS +The host database contains an entry for the name, but it doesn't have an +associated Internet address. +@end vtable + +The lookup functions above all have one thing in common: they are not +reentrant and therefore unusable in multi-threaded applications. +Therefore provides @theglibc{} a new set of functions which can be +used in this context. + +@comment netdb.h +@comment GNU +@deftypefun int gethostbyname_r (const char *restrict @var{name}, struct hostent *restrict @var{result_buf}, char *restrict @var{buf}, size_t @var{buflen}, struct hostent **restrict @var{result}, int *restrict @var{h_errnop}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{} @acsfd{}}} +@c gethostbyname_r @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c nss_hostname_digits_dots dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c nscd_gethostbyname_r @mtsenv @ascuheap @acsfd @acsmem +@c nscd_gethst_r @mtsenv @ascuheap @acsfd @acsmem +@c getenv dup @mtsenv +@c nscd_get_map_ref dup @ascuheap @acsfd @acsmem +@c nscd_cache_search dup ok +@c memcpy dup ok +@c nscd_open_socket dup @acsfd +@c readvall dup ok +@c readall dup ok +@c close_not_cancel_no_status dup @acsfd +@c nscd_drop_map_ref dup @ascuheap @acsmem +@c nscd_unmap dup @ascuheap @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c res_hconf_init @mtsenv @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem [no @asuinit:reshconf @acuinit:reshconf, conditionally called] +@c res_hconf.c:do_init @mtsenv @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem +@c memset dup ok +@c getenv dup @mtsenv +@c fopen dup @ascuheap @asulock @acsmem @acsfd @aculock +@c fsetlocking dup ok [no concurrent uses] +@c fgets_unlocked dup ok [no concurrent uses] +@c strchrnul dup ok +@c res_hconf.c:parse_line @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem +@c skip_ws dup @mtslocale +@c skip_string dup @mtslocale +@c strncasecmp dup @mtslocale +@c strlen dup ok +@c asprintf dup @mtslocale @ascuheap @acsmem +@c fxprintf dup @asucorrupt @aculock @acucorrupt +@c free dup @ascuheap @acsmem +@c arg_trimdomain_list dup @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem +@c arg_spoof dup @mtslocale +@c arg_bool dup @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem +@c isspace dup @mtslocale +@c fclose dup @ascuheap @asulock @acsmem @acsfd @aculock +@c arg_spoof @mtslocale +@c skip_string @mtslocale +@c isspace dup @mtslocale +@c strncasecmp dup @mtslocale +@c arg_bool @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem +@c strncasecmp dup @mtslocale +@c asprintf dup @mtslocale @ascuheap @acsmem +@c fxprintf dup @asucorrupt @aculock @acucorrupt +@c free dup @ascuheap @acsmem +@c arg_trimdomain_list @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem +@c skip_string dup @mtslocale +@c asprintf dup @mtslocale @ascuheap @acsmem +@c fxprintf dup @asucorrupt @aculock @acucorrupt +@c free dup @ascuheap @acsmem +@c strndup dup @ascuheap @acsmem +@c skip_ws @mtslocale +@c isspace dup @mtslocale +@c nss_hosts_lookup2 @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_database_lookup dup @mtslocale @ascuheap @asulock @acucorrupt @acsmem @acsfd @aculock +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_gethostbyname_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_hconf_reorder_addrs @asulock @ascuheap @aculock @acsmem @acsfd +@c socket dup @acsfd +@c libc_lock_lock dup @asulock @aculock +@c ifreq @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c if_nextreq dup ok +@c ioctl dup ok +@c realloc dup @ascuheap @acsmem +@c if_freereq dup @acsmem +@c libc_lock_unlock dup @aculock +@c close dup @acsfd +The @code{gethostbyname_r} function returns information about the host +named @var{name}. The caller must pass a pointer to an object of type +@code{struct hostent} in the @var{result_buf} parameter. In addition +the function may need extra buffer space and the caller must pass a +pointer and the size of the buffer in the @var{buf} and @var{buflen} +parameters. + +A pointer to the buffer, in which the result is stored, is available in +@code{*@var{result}} after the function call successfully returned. The +buffer passed as the @var{buf} parameter can be freed only once the caller +has finished with the result hostent struct, or has copied it including all +the other memory that it points to. If an error occurs or if no entry is +found, the pointer @code{*@var{result}} is a null pointer. Success is +signalled by a zero return value. If the function failed the return value +is an error number. In addition to the errors defined for +@code{gethostbyname} it can also be @code{ERANGE}. In this case the call +should be repeated with a larger buffer. Additional error information is +not stored in the global variable @code{h_errno} but instead in the object +pointed to by @var{h_errnop}. + +Here's a small example: +@smallexample +struct hostent * +gethostname (char *host) +@{ + struct hostent *hostbuf, *hp; + size_t hstbuflen; + char *tmphstbuf; + int res; + int herr; + + hostbuf = malloc (sizeof (struct hostent)); + hstbuflen = 1024; + tmphstbuf = malloc (hstbuflen); + + while ((res = gethostbyname_r (host, hostbuf, tmphstbuf, hstbuflen, + &hp, &herr)) == ERANGE) + @{ + /* Enlarge the buffer. */ + hstbuflen *= 2; + tmphstbuf = realloc (tmphstbuf, hstbuflen); + @} + + free (tmphstbuf); + /* Check for errors. */ + if (res || hp == NULL) + return NULL; + return hp; +@} +@end smallexample +@end deftypefun + +@comment netdb.h +@comment GNU +@deftypefun int gethostbyname2_r (const char *@var{name}, int @var{af}, struct hostent *restrict @var{result_buf}, char *restrict @var{buf}, size_t @var{buflen}, struct hostent **restrict @var{result}, int *restrict @var{h_errnop}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{} @acsfd{}}} +@c gethostbyname2_r @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c nss_hostname_digits_dots dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c nscd_gethostbyname2_r @mtsenv @ascuheap @asulock @aculock @acsfd @acsmem +@c nscd_gethst_r dup @mtsenv @ascuheap @asulock @aculock @acsfd @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c res_hconf_init dup @mtsenv @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem [no @asuinit:reshconf @acuinit:reshconf, conditionally called] +@c nss_hosts_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_gethostbyname2_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_hconf_reorder_addrs dup @asulock @ascuheap @aculock @acsmem @acsfd +The @code{gethostbyname2_r} function is like @code{gethostbyname_r}, but +allows the caller to specify the desired address family (e.g.@: +@code{AF_INET} or @code{AF_INET6}) for the result. +@end deftypefun + +@comment netdb.h +@comment GNU +@deftypefun int gethostbyaddr_r (const void *@var{addr}, socklen_t @var{length}, int @var{format}, struct hostent *restrict @var{result_buf}, char *restrict @var{buf}, size_t @var{buflen}, struct hostent **restrict @var{result}, int *restrict @var{h_errnop}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{} @acsfd{}}} +@c gethostbyaddr_r @mtsenv @mtslocale @ascudlopen @ascuplugin @asucorrupt @ascuheap @asulock @aculock @acucorrupt @acsmem @acsfd +@c memcmp dup ok +@c nscd_gethostbyaddr_r @mtsenv @ascuheap @asulock @aculock @acsfd @acsmem +@c nscd_gethst_r dup @mtsenv @ascuheap @asulock @aculock @acsfd @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c res_hconf_init dup @mtsenv @mtslocale @asucorrupt @ascuheap @aculock @acucorrupt @acsmem [no @asuinit:reshconf @acuinit:reshconf, conditionally called] +@c nss_hosts_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_gethostbyaddr_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_hconf_reorder_addrs dup @asulock @ascuheap @aculock @acsmem @acsfd +@c res_hconf_trim_domains @mtslocale +@c res_hconf_trim_domain @mtslocale +@c strlen dup ok +@c strcasecmp dup @mtslocale +The @code{gethostbyaddr_r} function returns information about the host +with Internet address @var{addr}. The parameter @var{addr} is not +really a pointer to char - it can be a pointer to an IPv4 or an IPv6 +address. The @var{length} argument is the size (in bytes) of the address +at @var{addr}. @var{format} specifies the address format; for an IPv4 +Internet address, specify a value of @code{AF_INET}; for an IPv6 +Internet address, use @code{AF_INET6}. + +Similar to the @code{gethostbyname_r} function, the caller must provide +buffers for the result and memory used internally. In case of success +the function returns zero. Otherwise the value is an error number where +@code{ERANGE} has the special meaning that the caller-provided buffer is +too small. +@end deftypefun + +You can also scan the entire hosts database one entry at a time using +@code{sethostent}, @code{gethostent} and @code{endhostent}. Be careful +when using these functions because they are not reentrant. + +@comment netdb.h +@comment BSD +@deftypefun void sethostent (int @var{stayopen}) +@safety{@prelim{}@mtunsafe{@mtasurace{:hostent} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c sethostent @mtasurace:hostent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_setent(nss_hosts_lookup2) @mtasurace:hostent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c set_h_errno dup ok +@c setup(nss_hosts_lookup2) @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *lookup_fct = nss_hosts_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:hostent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock dup @aculock +This function opens the hosts database to begin scanning it. You can +then call @code{gethostent} to read the entries. + +@c There was a rumor that this flag has different meaning if using the DNS, +@c but it appears this description is accurate in that case also. +If the @var{stayopen} argument is nonzero, this sets a flag so that +subsequent calls to @code{gethostbyname} or @code{gethostbyaddr} will +not close the database (as they usually would). This makes for more +efficiency if you call those functions several times, by avoiding +reopening the database for each call. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun {struct hostent *} gethostent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:hostent} @mtasurace{:hostentbuf} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c gethostent @mtasurace:hostent @mtasurace:hostentbuf @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent(gethostent_r) @mtasurace:hostent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c malloc dup @ascuheap @acsmem +@c *func = gethostent_r dup @mtasurace:hostent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c gethostent_r @mtasurace:hostent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent_r(nss_hosts_lookup2) @mtasurace:hostent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c setup(nss_hosts_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:hostent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *sfct.f @mtasurace:hostent @ascuplugin +@c libc_lock_unlock dup @aculock + +This function returns the next entry in the hosts database. It +returns a null pointer if there are no more entries. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun void endhostent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:hostent} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c endhostent @mtasurace:hostent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock @asulock @aculock +@c nss_endent(nss_hosts_lookup2) @mtasurace:hostent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c setup(nss_passwd_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:hostent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock @aculock +This function closes the hosts database. +@end deftypefun + +@node Ports +@subsection Internet Ports +@cindex port number + +A socket address in the Internet namespace consists of a machine's +Internet address plus a @dfn{port number} which distinguishes the +sockets on a given machine (for a given protocol). Port numbers range +from 0 to 65,535. + +Port numbers less than @code{IPPORT_RESERVED} are reserved for standard +servers, such as @code{finger} and @code{telnet}. There is a database +that keeps track of these, and you can use the @code{getservbyname} +function to map a service name onto a port number; see @ref{Services +Database}. + +If you write a server that is not one of the standard ones defined in +the database, you must choose a port number for it. Use a number +greater than @code{IPPORT_USERRESERVED}; such numbers are reserved for +servers and won't ever be generated automatically by the system. +Avoiding conflicts with servers being run by other users is up to you. + +When you use a socket without specifying its address, the system +generates a port number for it. This number is between +@code{IPPORT_RESERVED} and @code{IPPORT_USERRESERVED}. + +On the Internet, it is actually legitimate to have two different +sockets with the same port number, as long as they never both try to +communicate with the same socket address (host address plus port +number). You shouldn't duplicate a port number except in special +circumstances where a higher-level protocol requires it. Normally, +the system won't let you do it; @code{bind} normally insists on +distinct port numbers. To reuse a port number, you must set the +socket option @code{SO_REUSEADDR}. @xref{Socket-Level Options}. + +@pindex netinet/in.h +These macros are defined in the header file @file{netinet/in.h}. + +@comment netinet/in.h +@comment BSD +@deftypevr Macro int IPPORT_RESERVED +Port numbers less than @code{IPPORT_RESERVED} are reserved for +superuser use. +@end deftypevr + +@comment netinet/in.h +@comment BSD +@deftypevr Macro int IPPORT_USERRESERVED +Port numbers greater than or equal to @code{IPPORT_USERRESERVED} are +reserved for explicit use; they will never be allocated automatically. +@end deftypevr + +@node Services Database +@subsection The Services Database +@cindex services database +@cindex converting service name to port number +@cindex converting port number to service name + +@pindex /etc/services +The database that keeps track of ``well-known'' services is usually +either the file @file{/etc/services} or an equivalent from a name server. +You can use these utilities, declared in @file{netdb.h}, to access +the services database. +@pindex netdb.h + +@comment netdb.h +@comment BSD +@deftp {Data Type} {struct servent} +This data type holds information about entries from the services database. +It has the following members: + +@table @code +@item char *s_name +This is the ``official'' name of the service. + +@item char **s_aliases +These are alternate names for the service, represented as an array of +strings. A null pointer terminates the array. + +@item int s_port +This is the port number for the service. Port numbers are given in +network byte order; see @ref{Byte Order}. + +@item char *s_proto +This is the name of the protocol to use with this service. +@xref{Protocols Database}. +@end table +@end deftp + +To get information about a particular service, use the +@code{getservbyname} or @code{getservbyport} functions. The information +is returned in a statically-allocated structure; you must copy the +information if you need to save it across calls. + +@comment netdb.h +@comment BSD +@deftypefun {struct servent *} getservbyname (const char *@var{name}, const char *@var{proto}) +@safety{@prelim{}@mtunsafe{@mtasurace{:servbyname} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getservbyname =~ getpwuid @mtasurace:servbyname @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c getservbyname_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getservbyname_r =~ getpwuid_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nscd_getservbyname_r @ascuheap @acsfd @acsmem +@c nscd_getserv_r @ascuheap @acsfd @acsmem +@c nscd_get_map_ref dup @ascuheap @acsfd @acsmem +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c mempcpy dup ok +@c memcpy dup ok +@c nscd_cache_search dup ok +@c nscd_open_socket dup @acsfd +@c readvall dup ok +@c readall dup ok +@c close_not_cancel_no_status dup @acsfd +@c nscd_drop_map_ref dup @ascuheap @acsmem +@c nscd_unmap dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c nss_services_lookup2 =~ nss_passwd_lookup2 @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_getservbyname_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +The @code{getservbyname} function returns information about the +service named @var{name} using protocol @var{proto}. If it can't find +such a service, it returns a null pointer. + +This function is useful for servers as well as for clients; servers +use it to determine which port they should listen on (@pxref{Listening}). +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun {struct servent *} getservbyport (int @var{port}, const char *@var{proto}) +@safety{@prelim{}@mtunsafe{@mtasurace{:servbyport} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getservbyport =~ getservbyname @mtasurace:servbyport @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c getservbyport_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getservbyport_r =~ getservbyname_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nscd_getservbyport_r @ascuheap @acsfd @acsmem +@c nscd_getserv_r dup @ascuheap @acsfd @acsmem +@c nss_services_lookup2 =~ nss_passwd_lookup2 @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_getservbyport_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +The @code{getservbyport} function returns information about the +service at port @var{port} using protocol @var{proto}. If it can't +find such a service, it returns a null pointer. +@end deftypefun + +@noindent +You can also scan the services database using @code{setservent}, +@code{getservent} and @code{endservent}. Be careful when using these +functions because they are not reentrant. + +@comment netdb.h +@comment BSD +@deftypefun void setservent (int @var{stayopen}) +@safety{@prelim{}@mtunsafe{@mtasurace{:servent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c setservent @mtasurace:servent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_setent(nss_services_lookup2) @mtasurace:servenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c setup(nss_services_lookup2) @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *lookup_fct = nss_services_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:servent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock dup @aculock +This function opens the services database to begin scanning it. + +If the @var{stayopen} argument is nonzero, this sets a flag so that +subsequent calls to @code{getservbyname} or @code{getservbyport} will +not close the database (as they usually would). This makes for more +efficiency if you call those functions several times, by avoiding +reopening the database for each call. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun {struct servent *} getservent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:servent} @mtasurace{:serventbuf} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getservent @mtasurace:servent @mtasurace:serventbuf @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent(getservent_r) @mtasurace:servent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c malloc dup @ascuheap @acsmem +@c *func = getservent_r dup @mtasurace:servent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getservent_r @mtasurace:servent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent_r(nss_services_lookup2) @mtasurace:servent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c setup(nss_services_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:servent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *sfct.f @mtasurace:servent @ascuplugin +@c libc_lock_unlock dup @aculock +This function returns the next entry in the services database. If +there are no more entries, it returns a null pointer. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun void endservent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:servent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c endservent @mtasurace:servent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock @asulock @aculock +@c nss_endent(nss_services_lookup2) @mtasurace:servent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c setup(nss_services_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:servent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock @aculock +This function closes the services database. +@end deftypefun + +@node Byte Order +@subsection Byte Order Conversion +@cindex byte order conversion, for socket +@cindex converting byte order + +@cindex big-endian +@cindex little-endian +Different kinds of computers use different conventions for the +ordering of bytes within a word. Some computers put the most +significant byte within a word first (this is called ``big-endian'' +order), and others put it last (``little-endian'' order). + +@cindex network byte order +So that machines with different byte order conventions can +communicate, the Internet protocols specify a canonical byte order +convention for data transmitted over the network. This is known +as @dfn{network byte order}. + +When establishing an Internet socket connection, you must make sure that +the data in the @code{sin_port} and @code{sin_addr} members of the +@code{sockaddr_in} structure are represented in network byte order. +If you are encoding integer data in the messages sent through the +socket, you should convert this to network byte order too. If you don't +do this, your program may fail when running on or talking to other kinds +of machines. + +If you use @code{getservbyname} and @code{gethostbyname} or +@code{inet_addr} to get the port number and host address, the values are +already in network byte order, and you can copy them directly into +the @code{sockaddr_in} structure. + +Otherwise, you have to convert the values explicitly. Use @code{htons} +and @code{ntohs} to convert values for the @code{sin_port} member. Use +@code{htonl} and @code{ntohl} to convert IPv4 addresses for the +@code{sin_addr} member. (Remember, @code{struct in_addr} is equivalent +to @code{uint32_t}.) These functions are declared in +@file{netinet/in.h}. +@pindex netinet/in.h + +@comment netinet/in.h +@comment BSD +@deftypefun {uint16_t} htons (uint16_t @var{hostshort}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c htons ok +@c bswap_16 ok +@c bswap_constant_16 ok + +This function converts the @code{uint16_t} integer @var{hostshort} from +host byte order to network byte order. +@end deftypefun + +@comment netinet/in.h +@comment BSD +@deftypefun {uint16_t} ntohs (uint16_t @var{netshort}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Alias to htons. +This function converts the @code{uint16_t} integer @var{netshort} from +network byte order to host byte order. +@end deftypefun + +@comment netinet/in.h +@comment BSD +@deftypefun {uint32_t} htonl (uint32_t @var{hostlong}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c htonl ok +@c bswap_32 dup ok +This function converts the @code{uint32_t} integer @var{hostlong} from +host byte order to network byte order. + +This is used for IPv4 Internet addresses. +@end deftypefun + +@comment netinet/in.h +@comment BSD +@deftypefun {uint32_t} ntohl (uint32_t @var{netlong}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Alias to htonl. +This function converts the @code{uint32_t} integer @var{netlong} from +network byte order to host byte order. + +This is used for IPv4 Internet addresses. +@end deftypefun + +@node Protocols Database +@subsection Protocols Database +@cindex protocols database + +The communications protocol used with a socket controls low-level +details of how data are exchanged. For example, the protocol implements +things like checksums to detect errors in transmissions, and routing +instructions for messages. Normal user programs have little reason to +mess with these details directly. + +@cindex TCP (Internet protocol) +The default communications protocol for the Internet namespace depends on +the communication style. For stream communication, the default is TCP +(``transmission control protocol''). For datagram communication, the +default is UDP (``user datagram protocol''). For reliable datagram +communication, the default is RDP (``reliable datagram protocol''). +You should nearly always use the default. + +@pindex /etc/protocols +Internet protocols are generally specified by a name instead of a +number. The network protocols that a host knows about are stored in a +database. This is usually either derived from the file +@file{/etc/protocols}, or it may be an equivalent provided by a name +server. You look up the protocol number associated with a named +protocol in the database using the @code{getprotobyname} function. + +Here are detailed descriptions of the utilities for accessing the +protocols database. These are declared in @file{netdb.h}. +@pindex netdb.h + +@comment netdb.h +@comment BSD +@deftp {Data Type} {struct protoent} +This data type is used to represent entries in the network protocols +database. It has the following members: + +@table @code +@item char *p_name +This is the official name of the protocol. + +@item char **p_aliases +These are alternate names for the protocol, specified as an array of +strings. The last element of the array is a null pointer. + +@item int p_proto +This is the protocol number (in host byte order); use this member as the +@var{protocol} argument to @code{socket}. +@end table +@end deftp + +You can use @code{getprotobyname} and @code{getprotobynumber} to search +the protocols database for a specific protocol. The information is +returned in a statically-allocated structure; you must copy the +information if you need to save it across calls. + +@comment netdb.h +@comment BSD +@deftypefun {struct protoent *} getprotobyname (const char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasurace{:protobyname} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getprotobyname =~ getpwuid @mtasurace:protobyname @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c getprotobyname_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getprotobyname_r =~ getpwuid_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c no nscd support +@c nss_protocols_lookup2 =~ nss_passwd_lookup2 @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_getprotobyname_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +The @code{getprotobyname} function returns information about the +network protocol named @var{name}. If there is no such protocol, it +returns a null pointer. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun {struct protoent *} getprotobynumber (int @var{protocol}) +@safety{@prelim{}@mtunsafe{@mtasurace{:protobynumber} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getprotobynumber =~ getpwuid @mtasurace:protobynumber @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c getprotobynumber_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getprotobynumber_r =~ getpwuid_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c no nscd support +@c nss_protocols_lookup2 =~ nss_passwd_lookup2 @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_getprotobynumber_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +The @code{getprotobynumber} function returns information about the +network protocol with number @var{protocol}. If there is no such +protocol, it returns a null pointer. +@end deftypefun + +You can also scan the whole protocols database one protocol at a time by +using @code{setprotoent}, @code{getprotoent} and @code{endprotoent}. +Be careful when using these functions because they are not reentrant. + +@comment netdb.h +@comment BSD +@deftypefun void setprotoent (int @var{stayopen}) +@safety{@prelim{}@mtunsafe{@mtasurace{:protoent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c setprotoent @mtasurace:protoent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_setent(nss_protocols_lookup2) @mtasurace:protoent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c setup(nss_protocols_lookup2) @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *lookup_fct = nss_protocols_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:protoent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock dup @aculock +This function opens the protocols database to begin scanning it. + +If the @var{stayopen} argument is nonzero, this sets a flag so that +subsequent calls to @code{getprotobyname} or @code{getprotobynumber} will +not close the database (as they usually would). This makes for more +efficiency if you call those functions several times, by avoiding +reopening the database for each call. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun {struct protoent *} getprotoent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:protoent} @mtasurace{:protoentbuf} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getprotoent @mtasurace:protoent @mtasurace:protoentbuf @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent(getprotoent_r) @mtasurace:protoent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c malloc dup @ascuheap @acsmem +@c *func = getprotoent_r dup @mtasurace:protoent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getprotoent_r @mtasurace:protoent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent_r(nss_protocols_lookup2) @mtasurace:protoent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c setup(nss_protocols_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:servent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *sfct.f @mtasurace:protoent @ascuplugin +@c libc_lock_unlock dup @aculock +This function returns the next entry in the protocols database. It +returns a null pointer if there are no more entries. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun void endprotoent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:protoent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c endprotoent @mtasurace:protoent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock @asulock @aculock +@c nss_endent(nss_protocols_lookup2) @mtasurace:protoent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c setup(nss_protocols_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:protoent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock @aculock +This function closes the protocols database. +@end deftypefun + +@node Inet Example +@subsection Internet Socket Example + +Here is an example showing how to create and name a socket in the +Internet namespace. The newly created socket exists on the machine that +the program is running on. Rather than finding and using the machine's +Internet address, this example specifies @code{INADDR_ANY} as the host +address; the system replaces that with the machine's actual address. + +@smallexample +@include mkisock.c.texi +@end smallexample + +Here is another example, showing how you can fill in a @code{sockaddr_in} +structure, given a host name string and a port number: + +@smallexample +@include isockad.c.texi +@end smallexample + +@node Misc Namespaces +@section Other Namespaces + +@vindex PF_NS +@vindex PF_ISO +@vindex PF_CCITT +@vindex PF_IMPLINK +@vindex PF_ROUTE +Certain other namespaces and associated protocol families are supported +but not documented yet because they are not often used. @code{PF_NS} +refers to the Xerox Network Software protocols. @code{PF_ISO} stands +for Open Systems Interconnect. @code{PF_CCITT} refers to protocols from +CCITT. @file{socket.h} defines these symbols and others naming protocols +not actually implemented. + +@code{PF_IMPLINK} is used for communicating between hosts and Internet +Message Processors. For information on this and @code{PF_ROUTE}, an +occasionally-used local area routing protocol, see the GNU Hurd Manual +(to appear in the future). + +@node Open/Close Sockets +@section Opening and Closing Sockets + +This section describes the actual library functions for opening and +closing sockets. The same functions work for all namespaces and +connection styles. + +@menu +* Creating a Socket:: How to open a socket. +* Closing a Socket:: How to close a socket. +* Socket Pairs:: These are created like pipes. +@end menu + +@node Creating a Socket +@subsection Creating a Socket +@cindex creating a socket +@cindex socket, creating +@cindex opening a socket + +The primitive for creating a socket is the @code{socket} function, +declared in @file{sys/socket.h}. +@pindex sys/socket.h + +@comment sys/socket.h +@comment BSD +@deftypefun int socket (int @var{namespace}, int @var{style}, int @var{protocol}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +This function creates a socket and specifies communication style +@var{style}, which should be one of the socket styles listed in +@ref{Communication Styles}. The @var{namespace} argument specifies +the namespace; it must be @code{PF_LOCAL} (@pxref{Local Namespace}) or +@code{PF_INET} (@pxref{Internet Namespace}). @var{protocol} +designates the specific protocol (@pxref{Socket Concepts}); zero is +usually right for @var{protocol}. + +The return value from @code{socket} is the file descriptor for the new +socket, or @code{-1} in case of error. The following @code{errno} error +conditions are defined for this function: + +@table @code +@item EPROTONOSUPPORT +The @var{protocol} or @var{style} is not supported by the +@var{namespace} specified. + +@item EMFILE +The process already has too many file descriptors open. + +@item ENFILE +The system already has too many file descriptors open. + +@item EACCES +The process does not have the privilege to create a socket of the specified +@var{style} or @var{protocol}. + +@item ENOBUFS +The system ran out of internal buffer space. +@end table + +The file descriptor returned by the @code{socket} function supports both +read and write operations. However, like pipes, sockets do not support file +positioning operations. +@end deftypefun + +For examples of how to call the @code{socket} function, +see @ref{Local Socket Example}, or @ref{Inet Example}. + + +@node Closing a Socket +@subsection Closing a Socket +@cindex socket, closing +@cindex closing a socket +@cindex shutting down a socket +@cindex socket shutdown + +When you have finished using a socket, you can simply close its +file descriptor with @code{close}; see @ref{Opening and Closing Files}. +If there is still data waiting to be transmitted over the connection, +normally @code{close} tries to complete this transmission. You +can control this behavior using the @code{SO_LINGER} socket option to +specify a timeout period; see @ref{Socket Options}. + +@pindex sys/socket.h +You can also shut down only reception or transmission on a +connection by calling @code{shutdown}, which is declared in +@file{sys/socket.h}. + +@comment sys/socket.h +@comment BSD +@deftypefun int shutdown (int @var{socket}, int @var{how}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{shutdown} function shuts down the connection of socket +@var{socket}. The argument @var{how} specifies what action to +perform: + +@table @code +@item 0 +Stop receiving data for this socket. If further data arrives, +reject it. + +@item 1 +Stop trying to transmit data from this socket. Discard any data +waiting to be sent. Stop looking for acknowledgement of data already +sent; don't retransmit it if it is lost. + +@item 2 +Stop both reception and transmission. +@end table + +The return value is @code{0} on success and @code{-1} on failure. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +@var{socket} is not a valid file descriptor. + +@item ENOTSOCK +@var{socket} is not a socket. + +@item ENOTCONN +@var{socket} is not connected. +@end table +@end deftypefun + +@node Socket Pairs +@subsection Socket Pairs +@cindex creating a socket pair +@cindex socket pair +@cindex opening a socket pair + +@pindex sys/socket.h +A @dfn{socket pair} consists of a pair of connected (but unnamed) +sockets. It is very similar to a pipe and is used in much the same +way. Socket pairs are created with the @code{socketpair} function, +declared in @file{sys/socket.h}. A socket pair is much like a pipe; the +main difference is that the socket pair is bidirectional, whereas the +pipe has one input-only end and one output-only end (@pxref{Pipes and +FIFOs}). + +@comment sys/socket.h +@comment BSD +@deftypefun int socketpair (int @var{namespace}, int @var{style}, int @var{protocol}, int @var{filedes}@t{[2]}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +This function creates a socket pair, returning the file descriptors in +@code{@var{filedes}[0]} and @code{@var{filedes}[1]}. The socket pair +is a full-duplex communications channel, so that both reading and writing +may be performed at either end. + +The @var{namespace}, @var{style} and @var{protocol} arguments are +interpreted as for the @code{socket} function. @var{style} should be +one of the communication styles listed in @ref{Communication Styles}. +The @var{namespace} argument specifies the namespace, which must be +@code{AF_LOCAL} (@pxref{Local Namespace}); @var{protocol} specifies the +communications protocol, but zero is the only meaningful value. + +If @var{style} specifies a connectionless communication style, then +the two sockets you get are not @emph{connected}, strictly speaking, +but each of them knows the other as the default destination address, +so they can send packets to each other. + +The @code{socketpair} function returns @code{0} on success and @code{-1} +on failure. The following @code{errno} error conditions are defined +for this function: + +@table @code +@item EMFILE +The process has too many file descriptors open. + +@item EAFNOSUPPORT +The specified namespace is not supported. + +@item EPROTONOSUPPORT +The specified protocol is not supported. + +@item EOPNOTSUPP +The specified protocol does not support the creation of socket pairs. +@end table +@end deftypefun + +@node Connections +@section Using Sockets with Connections + +@cindex connection +@cindex client +@cindex server +The most common communication styles involve making a connection to a +particular other socket, and then exchanging data with that socket +over and over. Making a connection is asymmetric; one side (the +@dfn{client}) acts to request a connection, while the other side (the +@dfn{server}) makes a socket and waits for the connection request. + +@iftex +@itemize @bullet +@item +@ref{Connecting}, describes what the client program must do to +initiate a connection with a server. + +@item +@ref{Listening} and @ref{Accepting Connections} describe what the +server program must do to wait for and act upon connection requests +from clients. + +@item +@ref{Transferring Data}, describes how data are transferred through the +connected socket. +@end itemize +@end iftex + +@menu +* Connecting:: What the client program must do. +* Listening:: How a server program waits for requests. +* Accepting Connections:: What the server does when it gets a request. +* Who is Connected:: Getting the address of the + other side of a connection. +* Transferring Data:: How to send and receive data. +* Byte Stream Example:: An example program: a client for communicating + over a byte stream socket in the Internet namespace. +* Server Example:: A corresponding server program. +* Out-of-Band Data:: This is an advanced feature. +@end menu + +@node Connecting +@subsection Making a Connection +@cindex connecting a socket +@cindex socket, connecting +@cindex socket, initiating a connection +@cindex socket, client actions + +In making a connection, the client makes a connection while the server +waits for and accepts the connection. Here we discuss what the client +program must do with the @code{connect} function, which is declared in +@file{sys/socket.h}. + +@comment sys/socket.h +@comment BSD +@deftypefun int connect (int @var{socket}, struct sockaddr *@var{addr}, socklen_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{connect} function initiates a connection from the socket +with file descriptor @var{socket} to the socket whose address is +specified by the @var{addr} and @var{length} arguments. (This socket +is typically on another machine, and it must be already set up as a +server.) @xref{Socket Addresses}, for information about how these +arguments are interpreted. + +Normally, @code{connect} waits until the server responds to the request +before it returns. You can set nonblocking mode on the socket +@var{socket} to make @code{connect} return immediately without waiting +for the response. @xref{File Status Flags}, for information about +nonblocking mode. +@c !!! how do you tell when it has finished connecting? I suspect the +@c way you do it is select for writing. + +The normal return value from @code{connect} is @code{0}. If an error +occurs, @code{connect} returns @code{-1}. The following @code{errno} +error conditions are defined for this function: + +@table @code +@item EBADF +The socket @var{socket} is not a valid file descriptor. + +@item ENOTSOCK +File descriptor @var{socket} is not a socket. + +@item EADDRNOTAVAIL +The specified address is not available on the remote machine. + +@item EAFNOSUPPORT +The namespace of the @var{addr} is not supported by this socket. + +@item EISCONN +The socket @var{socket} is already connected. + +@item ETIMEDOUT +The attempt to establish the connection timed out. + +@item ECONNREFUSED +The server has actively refused to establish the connection. + +@item ENETUNREACH +The network of the given @var{addr} isn't reachable from this host. + +@item EADDRINUSE +The socket address of the given @var{addr} is already in use. + +@item EINPROGRESS +The socket @var{socket} is non-blocking and the connection could not be +established immediately. You can determine when the connection is +completely established with @code{select}; @pxref{Waiting for I/O}. +Another @code{connect} call on the same socket, before the connection is +completely established, will fail with @code{EALREADY}. + +@item EALREADY +The socket @var{socket} is non-blocking and already has a pending +connection in progress (see @code{EINPROGRESS} above). +@end table + +This function is defined as a cancellation point in multi-threaded +programs, so one has to be prepared for this and make sure that +allocated resources (like memory, file descriptors, semaphores or +whatever) are freed even if the thread is canceled. +@c @xref{pthread_cleanup_push}, for a method how to do this. +@end deftypefun + +@node Listening +@subsection Listening for Connections +@cindex listening (sockets) +@cindex sockets, server actions +@cindex sockets, listening + +Now let us consider what the server process must do to accept +connections on a socket. First it must use the @code{listen} function +to enable connection requests on the socket, and then accept each +incoming connection with a call to @code{accept} (@pxref{Accepting +Connections}). Once connection requests are enabled on a server socket, +the @code{select} function reports when the socket has a connection +ready to be accepted (@pxref{Waiting for I/O}). + +The @code{listen} function is not allowed for sockets using +connectionless communication styles. + +You can write a network server that does not even start running until a +connection to it is requested. @xref{Inetd Servers}. + +In the Internet namespace, there are no special protection mechanisms +for controlling access to a port; any process on any machine +can make a connection to your server. If you want to restrict access to +your server, make it examine the addresses associated with connection +requests or implement some other handshaking or identification +protocol. + +In the local namespace, the ordinary file protection bits control who has +access to connect to the socket. + +@comment sys/socket.h +@comment BSD +@deftypefun int listen (int @var{socket}, int @var{n}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +The @code{listen} function enables the socket @var{socket} to accept +connections, thus making it a server socket. + +The argument @var{n} specifies the length of the queue for pending +connections. When the queue fills, new clients attempting to connect +fail with @code{ECONNREFUSED} until the server calls @code{accept} to +accept a connection from the queue. + +The @code{listen} function returns @code{0} on success and @code{-1} +on failure. The following @code{errno} error conditions are defined +for this function: + +@table @code +@item EBADF +The argument @var{socket} is not a valid file descriptor. + +@item ENOTSOCK +The argument @var{socket} is not a socket. + +@item EOPNOTSUPP +The socket @var{socket} does not support this operation. +@end table +@end deftypefun + +@node Accepting Connections +@subsection Accepting Connections +@cindex sockets, accepting connections +@cindex accepting connections + +When a server receives a connection request, it can complete the +connection by accepting the request. Use the function @code{accept} +to do this. + +A socket that has been established as a server can accept connection +requests from multiple clients. The server's original socket +@emph{does not become part of the connection}; instead, @code{accept} +makes a new socket which participates in the connection. +@code{accept} returns the descriptor for this socket. The server's +original socket remains available for listening for further connection +requests. + +The number of pending connection requests on a server socket is finite. +If connection requests arrive from clients faster than the server can +act upon them, the queue can fill up and additional requests are refused +with an @code{ECONNREFUSED} error. You can specify the maximum length of +this queue as an argument to the @code{listen} function, although the +system may also impose its own internal limit on the length of this +queue. + +@comment sys/socket.h +@comment BSD +@deftypefun int accept (int @var{socket}, struct sockaddr *@var{addr}, socklen_t *@var{length_ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +This function is used to accept a connection request on the server +socket @var{socket}. + +The @code{accept} function waits if there are no connections pending, +unless the socket @var{socket} has nonblocking mode set. (You can use +@code{select} to wait for a pending connection, with a nonblocking +socket.) @xref{File Status Flags}, for information about nonblocking +mode. + +The @var{addr} and @var{length-ptr} arguments are used to return +information about the name of the client socket that initiated the +connection. @xref{Socket Addresses}, for information about the format +of the information. + +Accepting a connection does not make @var{socket} part of the +connection. Instead, it creates a new socket which becomes +connected. The normal return value of @code{accept} is the file +descriptor for the new socket. + +After @code{accept}, the original socket @var{socket} remains open and +unconnected, and continues listening until you close it. You can +accept further connections with @var{socket} by calling @code{accept} +again. + +If an error occurs, @code{accept} returns @code{-1}. The following +@code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{socket} argument is not a valid file descriptor. + +@item ENOTSOCK +The descriptor @var{socket} argument is not a socket. + +@item EOPNOTSUPP +The descriptor @var{socket} does not support this operation. + +@item EWOULDBLOCK +@var{socket} has nonblocking mode set, and there are no pending +connections immediately available. +@end table + +This function is defined as a cancellation point in multi-threaded +programs, so one has to be prepared for this and make sure that +allocated resources (like memory, file descriptors, semaphores or +whatever) are freed even if the thread is canceled. +@c @xref{pthread_cleanup_push}, for a method how to do this. +@end deftypefun + +The @code{accept} function is not allowed for sockets using +connectionless communication styles. + +@node Who is Connected +@subsection Who is Connected to Me? + +@comment sys/socket.h +@comment BSD +@deftypefun int getpeername (int @var{socket}, struct sockaddr *@var{addr}, socklen_t *@var{length-ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getpeername} function returns the address of the socket that +@var{socket} is connected to; it stores the address in the memory space +specified by @var{addr} and @var{length-ptr}. It stores the length of +the address in @code{*@var{length-ptr}}. + +@xref{Socket Addresses}, for information about the format of the +address. In some operating systems, @code{getpeername} works only for +sockets in the Internet domain. + +The return value is @code{0} on success and @code{-1} on error. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The argument @var{socket} is not a valid file descriptor. + +@item ENOTSOCK +The descriptor @var{socket} is not a socket. + +@item ENOTCONN +The socket @var{socket} is not connected. + +@item ENOBUFS +There are not enough internal buffers available. +@end table +@end deftypefun + + +@node Transferring Data +@subsection Transferring Data +@cindex reading from a socket +@cindex writing to a socket + +Once a socket has been connected to a peer, you can use the ordinary +@code{read} and @code{write} operations (@pxref{I/O Primitives}) to +transfer data. A socket is a two-way communications channel, so read +and write operations can be performed at either end. + +There are also some I/O modes that are specific to socket operations. +In order to specify these modes, you must use the @code{recv} and +@code{send} functions instead of the more generic @code{read} and +@code{write} functions. The @code{recv} and @code{send} functions take +an additional argument which you can use to specify various flags to +control special I/O modes. For example, you can specify the +@code{MSG_OOB} flag to read or write out-of-band data, the +@code{MSG_PEEK} flag to peek at input, or the @code{MSG_DONTROUTE} flag +to control inclusion of routing information on output. + +@menu +* Sending Data:: Sending data with @code{send}. +* Receiving Data:: Reading data with @code{recv}. +* Socket Data Options:: Using @code{send} and @code{recv}. +@end menu + +@node Sending Data +@subsubsection Sending Data + +@pindex sys/socket.h +The @code{send} function is declared in the header file +@file{sys/socket.h}. If your @var{flags} argument is zero, you can just +as well use @code{write} instead of @code{send}; see @ref{I/O +Primitives}. If the socket was connected but the connection has broken, +you get a @code{SIGPIPE} signal for any use of @code{send} or +@code{write} (@pxref{Miscellaneous Signals}). + +@comment sys/socket.h +@comment BSD +@deftypefun ssize_t send (int @var{socket}, const void *@var{buffer}, size_t @var{size}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{send} function is like @code{write}, but with the additional +flags @var{flags}. The possible values of @var{flags} are described +in @ref{Socket Data Options}. + +This function returns the number of bytes transmitted, or @code{-1} on +failure. If the socket is nonblocking, then @code{send} (like +@code{write}) can return after sending just part of the data. +@xref{File Status Flags}, for information about nonblocking mode. + +Note, however, that a successful return value merely indicates that +the message has been sent without error, not necessarily that it has +been received without error. + +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{socket} argument is not a valid file descriptor. + +@item EINTR +The operation was interrupted by a signal before any data was sent. +@xref{Interrupted Primitives}. + +@item ENOTSOCK +The descriptor @var{socket} is not a socket. + +@item EMSGSIZE +The socket type requires that the message be sent atomically, but the +message is too large for this to be possible. + +@item EWOULDBLOCK +Nonblocking mode has been set on the socket, and the write operation +would block. (Normally @code{send} blocks until the operation can be +completed.) + +@item ENOBUFS +There is not enough internal buffer space available. + +@item ENOTCONN +You never connected this socket. + +@item EPIPE +This socket was connected but the connection is now broken. In this +case, @code{send} generates a @code{SIGPIPE} signal first; if that +signal is ignored or blocked, or if its handler returns, then +@code{send} fails with @code{EPIPE}. +@end table + +This function is defined as a cancellation point in multi-threaded +programs, so one has to be prepared for this and make sure that +allocated resources (like memory, file descriptors, semaphores or +whatever) are freed even if the thread is canceled. +@c @xref{pthread_cleanup_push}, for a method how to do this. +@end deftypefun + +@node Receiving Data +@subsubsection Receiving Data + +@pindex sys/socket.h +The @code{recv} function is declared in the header file +@file{sys/socket.h}. If your @var{flags} argument is zero, you can +just as well use @code{read} instead of @code{recv}; see @ref{I/O +Primitives}. + +@comment sys/socket.h +@comment BSD +@deftypefun ssize_t recv (int @var{socket}, void *@var{buffer}, size_t @var{size}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{recv} function is like @code{read}, but with the additional +flags @var{flags}. The possible values of @var{flags} are described +in @ref{Socket Data Options}. + +If nonblocking mode is set for @var{socket}, and no data are available to +be read, @code{recv} fails immediately rather than waiting. @xref{File +Status Flags}, for information about nonblocking mode. + +This function returns the number of bytes received, or @code{-1} on failure. +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{socket} argument is not a valid file descriptor. + +@item ENOTSOCK +The descriptor @var{socket} is not a socket. + +@item EWOULDBLOCK +Nonblocking mode has been set on the socket, and the read operation +would block. (Normally, @code{recv} blocks until there is input +available to be read.) + +@item EINTR +The operation was interrupted by a signal before any data was read. +@xref{Interrupted Primitives}. + +@item ENOTCONN +You never connected this socket. +@end table + +This function is defined as a cancellation point in multi-threaded +programs, so one has to be prepared for this and make sure that +allocated resources (like memory, file descriptors, semaphores or +whatever) are freed even if the thread is canceled. +@c @xref{pthread_cleanup_push}, for a method how to do this. +@end deftypefun + +@node Socket Data Options +@subsubsection Socket Data Options + +@pindex sys/socket.h +The @var{flags} argument to @code{send} and @code{recv} is a bit +mask. You can bitwise-OR the values of the following macros together +to obtain a value for this argument. All are defined in the header +file @file{sys/socket.h}. + +@comment sys/socket.h +@comment BSD +@deftypevr Macro int MSG_OOB +Send or receive out-of-band data. @xref{Out-of-Band Data}. +@end deftypevr + +@comment sys/socket.h +@comment BSD +@deftypevr Macro int MSG_PEEK +Look at the data but don't remove it from the input queue. This is +only meaningful with input functions such as @code{recv}, not with +@code{send}. +@end deftypevr + +@comment sys/socket.h +@comment BSD +@deftypevr Macro int MSG_DONTROUTE +Don't include routing information in the message. This is only +meaningful with output operations, and is usually only of interest for +diagnostic or routing programs. We don't try to explain it here. +@end deftypevr + +@node Byte Stream Example +@subsection Byte Stream Socket Example + +Here is an example client program that makes a connection for a byte +stream socket in the Internet namespace. It doesn't do anything +particularly interesting once it has connected to the server; it just +sends a text string to the server and exits. + +This program uses @code{init_sockaddr} to set up the socket address; see +@ref{Inet Example}. + +@smallexample +@include inetcli.c.texi +@end smallexample + +@node Server Example +@subsection Byte Stream Connection Server Example + +The server end is much more complicated. Since we want to allow +multiple clients to be connected to the server at the same time, it +would be incorrect to wait for input from a single client by simply +calling @code{read} or @code{recv}. Instead, the right thing to do is +to use @code{select} (@pxref{Waiting for I/O}) to wait for input on +all of the open sockets. This also allows the server to deal with +additional connection requests. + +This particular server doesn't do anything interesting once it has +gotten a message from a client. It does close the socket for that +client when it detects an end-of-file condition (resulting from the +client shutting down its end of the connection). + +This program uses @code{make_socket} to set up the socket address; see +@ref{Inet Example}. + +@smallexample +@include inetsrv.c.texi +@end smallexample + +@node Out-of-Band Data +@subsection Out-of-Band Data + +@cindex out-of-band data +@cindex high-priority data +Streams with connections permit @dfn{out-of-band} data that is +delivered with higher priority than ordinary data. Typically the +reason for sending out-of-band data is to send notice of an +exceptional condition. To send out-of-band data use +@code{send}, specifying the flag @code{MSG_OOB} (@pxref{Sending +Data}). + +Out-of-band data are received with higher priority because the +receiving process need not read it in sequence; to read the next +available out-of-band data, use @code{recv} with the @code{MSG_OOB} +flag (@pxref{Receiving Data}). Ordinary read operations do not read +out-of-band data; they read only ordinary data. + +@cindex urgent socket condition +When a socket finds that out-of-band data are on their way, it sends a +@code{SIGURG} signal to the owner process or process group of the +socket. You can specify the owner using the @code{F_SETOWN} command +to the @code{fcntl} function; see @ref{Interrupt Input}. You must +also establish a handler for this signal, as described in @ref{Signal +Handling}, in order to take appropriate action such as reading the +out-of-band data. + +Alternatively, you can test for pending out-of-band data, or wait +until there is out-of-band data, using the @code{select} function; it +can wait for an exceptional condition on the socket. @xref{Waiting +for I/O}, for more information about @code{select}. + +Notification of out-of-band data (whether with @code{SIGURG} or with +@code{select}) indicates that out-of-band data are on the way; the data +may not actually arrive until later. If you try to read the +out-of-band data before it arrives, @code{recv} fails with an +@code{EWOULDBLOCK} error. + +Sending out-of-band data automatically places a ``mark'' in the stream +of ordinary data, showing where in the sequence the out-of-band data +``would have been''. This is useful when the meaning of out-of-band +data is ``cancel everything sent so far''. Here is how you can test, +in the receiving process, whether any ordinary data was sent before +the mark: + +@smallexample +success = ioctl (socket, SIOCATMARK, &atmark); +@end smallexample + +The @code{integer} variable @var{atmark} is set to a nonzero value if +the socket's read pointer has reached the ``mark''. + +@c Posix 1.g specifies sockatmark for this ioctl. sockatmark is not +@c implemented yet. + +Here's a function to discard any ordinary data preceding the +out-of-band mark: + +@smallexample +int +discard_until_mark (int socket) +@{ + while (1) + @{ + /* @r{This is not an arbitrary limit; any size will do.} */ + char buffer[1024]; + int atmark, success; + + /* @r{If we have reached the mark, return.} */ + success = ioctl (socket, SIOCATMARK, &atmark); + if (success < 0) + perror ("ioctl"); + if (result) + return; + + /* @r{Otherwise, read a bunch of ordinary data and discard it.} + @r{This is guaranteed not to read past the mark} + @r{if it starts before the mark.} */ + success = read (socket, buffer, sizeof buffer); + if (success < 0) + perror ("read"); + @} +@} +@end smallexample + +If you don't want to discard the ordinary data preceding the mark, you +may need to read some of it anyway, to make room in internal system +buffers for the out-of-band data. If you try to read out-of-band data +and get an @code{EWOULDBLOCK} error, try reading some ordinary data +(saving it so that you can use it when you want it) and see if that +makes room. Here is an example: + +@smallexample +struct buffer +@{ + char *buf; + int size; + struct buffer *next; +@}; + +/* @r{Read the out-of-band data from SOCKET and return it} + @r{as a `struct buffer', which records the address of the data} + @r{and its size.} + + @r{It may be necessary to read some ordinary data} + @r{in order to make room for the out-of-band data.} + @r{If so, the ordinary data are saved as a chain of buffers} + @r{found in the `next' field of the value.} */ + +struct buffer * +read_oob (int socket) +@{ + struct buffer *tail = 0; + struct buffer *list = 0; + + while (1) + @{ + /* @r{This is an arbitrary limit.} + @r{Does anyone know how to do this without a limit?} */ +#define BUF_SZ 1024 + char *buf = (char *) xmalloc (BUF_SZ); + int success; + int atmark; + + /* @r{Try again to read the out-of-band data.} */ + success = recv (socket, buf, BUF_SZ, MSG_OOB); + if (success >= 0) + @{ + /* @r{We got it, so return it.} */ + struct buffer *link + = (struct buffer *) xmalloc (sizeof (struct buffer)); + link->buf = buf; + link->size = success; + link->next = list; + return link; + @} + + /* @r{If we fail, see if we are at the mark.} */ + success = ioctl (socket, SIOCATMARK, &atmark); + if (success < 0) + perror ("ioctl"); + if (atmark) + @{ + /* @r{At the mark; skipping past more ordinary data cannot help.} + @r{So just wait a while.} */ + sleep (1); + continue; + @} + + /* @r{Otherwise, read a bunch of ordinary data and save it.} + @r{This is guaranteed not to read past the mark} + @r{if it starts before the mark.} */ + success = read (socket, buf, BUF_SZ); + if (success < 0) + perror ("read"); + + /* @r{Save this data in the buffer list.} */ + @{ + struct buffer *link + = (struct buffer *) xmalloc (sizeof (struct buffer)); + link->buf = buf; + link->size = success; + + /* @r{Add the new link to the end of the list.} */ + if (tail) + tail->next = link; + else + list = link; + tail = link; + @} + @} +@} +@end smallexample + +@node Datagrams +@section Datagram Socket Operations + +@cindex datagram socket +This section describes how to use communication styles that don't use +connections (styles @code{SOCK_DGRAM} and @code{SOCK_RDM}). Using +these styles, you group data into packets and each packet is an +independent communication. You specify the destination for each +packet individually. + +Datagram packets are like letters: you send each one independently +with its own destination address, and they may arrive in the wrong +order or not at all. + +The @code{listen} and @code{accept} functions are not allowed for +sockets using connectionless communication styles. + +@menu +* Sending Datagrams:: Sending packets on a datagram socket. +* Receiving Datagrams:: Receiving packets on a datagram socket. +* Datagram Example:: An example program: packets sent over a + datagram socket in the local namespace. +* Example Receiver:: Another program, that receives those packets. +@end menu + +@node Sending Datagrams +@subsection Sending Datagrams +@cindex sending a datagram +@cindex transmitting datagrams +@cindex datagrams, transmitting + +@pindex sys/socket.h +The normal way of sending data on a datagram socket is by using the +@code{sendto} function, declared in @file{sys/socket.h}. + +You can call @code{connect} on a datagram socket, but this only +specifies a default destination for further data transmission on the +socket. When a socket has a default destination you can use +@code{send} (@pxref{Sending Data}) or even @code{write} (@pxref{I/O +Primitives}) to send a packet there. You can cancel the default +destination by calling @code{connect} using an address format of +@code{AF_UNSPEC} in the @var{addr} argument. @xref{Connecting}, for +more information about the @code{connect} function. + +@comment sys/socket.h +@comment BSD +@deftypefun ssize_t sendto (int @var{socket}, const void *@var{buffer}, size_t @var{size}, int @var{flags}, struct sockaddr *@var{addr}, socklen_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{sendto} function transmits the data in the @var{buffer} +through the socket @var{socket} to the destination address specified +by the @var{addr} and @var{length} arguments. The @var{size} argument +specifies the number of bytes to be transmitted. + +The @var{flags} are interpreted the same way as for @code{send}; see +@ref{Socket Data Options}. + +The return value and error conditions are also the same as for +@code{send}, but you cannot rely on the system to detect errors and +report them; the most common error is that the packet is lost or there +is no-one at the specified address to receive it, and the operating +system on your machine usually does not know this. + +It is also possible for one call to @code{sendto} to report an error +owing to a problem related to a previous call. + +This function is defined as a cancellation point in multi-threaded +programs, so one has to be prepared for this and make sure that +allocated resources (like memory, file descriptors, semaphores or +whatever) are freed even if the thread is canceled. +@c @xref{pthread_cleanup_push}, for a method how to do this. +@end deftypefun + +@node Receiving Datagrams +@subsection Receiving Datagrams +@cindex receiving datagrams + +The @code{recvfrom} function reads a packet from a datagram socket and +also tells you where it was sent from. This function is declared in +@file{sys/socket.h}. + +@comment sys/socket.h +@comment BSD +@deftypefun ssize_t recvfrom (int @var{socket}, void *@var{buffer}, size_t @var{size}, int @var{flags}, struct sockaddr *@var{addr}, socklen_t *@var{length-ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{recvfrom} function reads one packet from the socket +@var{socket} into the buffer @var{buffer}. The @var{size} argument +specifies the maximum number of bytes to be read. + +If the packet is longer than @var{size} bytes, then you get the first +@var{size} bytes of the packet and the rest of the packet is lost. +There's no way to read the rest of the packet. Thus, when you use a +packet protocol, you must always know how long a packet to expect. + +The @var{addr} and @var{length-ptr} arguments are used to return the +address where the packet came from. @xref{Socket Addresses}. For a +socket in the local domain the address information won't be meaningful, +since you can't read the address of such a socket (@pxref{Local +Namespace}). You can specify a null pointer as the @var{addr} argument +if you are not interested in this information. + +The @var{flags} are interpreted the same way as for @code{recv} +(@pxref{Socket Data Options}). The return value and error conditions +are also the same as for @code{recv}. + +This function is defined as a cancellation point in multi-threaded +programs, so one has to be prepared for this and make sure that +allocated resources (like memory, file descriptors, semaphores or +whatever) are freed even if the thread is canceled. +@c @xref{pthread_cleanup_push}, for a method how to do this. +@end deftypefun + +You can use plain @code{recv} (@pxref{Receiving Data}) instead of +@code{recvfrom} if you don't need to find out who sent the packet +(either because you know where it should come from or because you +treat all possible senders alike). Even @code{read} can be used if +you don't want to specify @var{flags} (@pxref{I/O Primitives}). + +@ignore +@c sendmsg and recvmsg are like readv and writev in that they +@c use a series of buffers. It's not clear this is worth +@c supporting or that we support them. +@c !!! they can do more; it is hairy + +@comment sys/socket.h +@comment BSD +@deftp {Data Type} {struct msghdr} +@end deftp + +@comment sys/socket.h +@comment BSD +@deftypefun ssize_t sendmsg (int @var{socket}, const struct msghdr *@var{message}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +This function is defined as a cancellation point in multi-threaded +programs, so one has to be prepared for this and make sure that +allocated resources (like memory, files descriptors, semaphores or +whatever) are freed even if the thread is cancel. +@c @xref{pthread_cleanup_push}, for a method how to do this. +@end deftypefun + +@comment sys/socket.h +@comment BSD +@deftypefun ssize_t recvmsg (int @var{socket}, struct msghdr *@var{message}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +This function is defined as a cancellation point in multi-threaded +programs, so one has to be prepared for this and make sure that +allocated resources (like memory, files descriptors, semaphores or +whatever) are freed even if the thread is canceled. +@c @xref{pthread_cleanup_push}, for a method how to do this. +@end deftypefun +@end ignore + +@node Datagram Example +@subsection Datagram Socket Example + +Here is a set of example programs that send messages over a datagram +stream in the local namespace. Both the client and server programs use +the @code{make_named_socket} function that was presented in @ref{Local +Socket Example}, to create and name their sockets. + +First, here is the server program. It sits in a loop waiting for +messages to arrive, bouncing each message back to the sender. +Obviously this isn't a particularly useful program, but it does show +the general ideas involved. + +@smallexample +@include filesrv.c.texi +@end smallexample + +@node Example Receiver +@subsection Example of Reading Datagrams + +Here is the client program corresponding to the server above. + +It sends a datagram to the server and then waits for a reply. Notice +that the socket for the client (as well as for the server) in this +example has to be given a name. This is so that the server can direct +a message back to the client. Since the socket has no associated +connection state, the only way the server can do this is by +referencing the name of the client. + +@smallexample +@include filecli.c.texi +@end smallexample + +Keep in mind that datagram socket communications are unreliable. In +this example, the client program waits indefinitely if the message +never reaches the server or if the server's response never comes +back. It's up to the user running the program to kill and restart +it if desired. A more automatic solution could be to use +@code{select} (@pxref{Waiting for I/O}) to establish a timeout period +for the reply, and in case of timeout either re-send the message or +shut down the socket and exit. + +@node Inetd +@section The @code{inetd} Daemon + +We've explained above how to write a server program that does its own +listening. Such a server must already be running in order for anyone +to connect to it. + +Another way to provide a service on an Internet port is to let the daemon +program @code{inetd} do the listening. @code{inetd} is a program that +runs all the time and waits (using @code{select}) for messages on a +specified set of ports. When it receives a message, it accepts the +connection (if the socket style calls for connections) and then forks a +child process to run the corresponding server program. You specify the +ports and their programs in the file @file{/etc/inetd.conf}. + +@menu +* Inetd Servers:: +* Configuring Inetd:: +@end menu + +@node Inetd Servers +@subsection @code{inetd} Servers + +Writing a server program to be run by @code{inetd} is very simple. Each time +someone requests a connection to the appropriate port, a new server +process starts. The connection already exists at this time; the +socket is available as the standard input descriptor and as the +standard output descriptor (descriptors 0 and 1) in the server +process. Thus the server program can begin reading and writing data +right away. Often the program needs only the ordinary I/O facilities; +in fact, a general-purpose filter program that knows nothing about +sockets can work as a byte stream server run by @code{inetd}. + +You can also use @code{inetd} for servers that use connectionless +communication styles. For these servers, @code{inetd} does not try to accept +a connection since no connection is possible. It just starts the +server program, which can read the incoming datagram packet from +descriptor 0. The server program can handle one request and then +exit, or you can choose to write it to keep reading more requests +until no more arrive, and then exit. You must specify which of these +two techniques the server uses when you configure @code{inetd}. + +@node Configuring Inetd +@subsection Configuring @code{inetd} + +The file @file{/etc/inetd.conf} tells @code{inetd} which ports to listen to +and what server programs to run for them. Normally each entry in the +file is one line, but you can split it onto multiple lines provided +all but the first line of the entry start with whitespace. Lines that +start with @samp{#} are comments. + +Here are two standard entries in @file{/etc/inetd.conf}: + +@smallexample +ftp stream tcp nowait root /libexec/ftpd ftpd +talk dgram udp wait root /libexec/talkd talkd +@end smallexample + +An entry has this format: + +@smallexample +@var{service} @var{style} @var{protocol} @var{wait} @var{username} @var{program} @var{arguments} +@end smallexample + +The @var{service} field says which service this program provides. It +should be the name of a service defined in @file{/etc/services}. +@code{inetd} uses @var{service} to decide which port to listen on for +this entry. + +The fields @var{style} and @var{protocol} specify the communication +style and the protocol to use for the listening socket. The style +should be the name of a communication style, converted to lower case +and with @samp{SOCK_} deleted---for example, @samp{stream} or +@samp{dgram}. @var{protocol} should be one of the protocols listed in +@file{/etc/protocols}. The typical protocol names are @samp{tcp} for +byte stream connections and @samp{udp} for unreliable datagrams. + +The @var{wait} field should be either @samp{wait} or @samp{nowait}. +Use @samp{wait} if @var{style} is a connectionless style and the +server, once started, handles multiple requests as they come in. +Use @samp{nowait} if @code{inetd} should start a new process for each message +or request that comes in. If @var{style} uses connections, then +@var{wait} @strong{must} be @samp{nowait}. + +@var{user} is the user name that the server should run as. @code{inetd} runs +as root, so it can set the user ID of its children arbitrarily. It's +best to avoid using @samp{root} for @var{user} if you can; but some +servers, such as Telnet and FTP, read a username and password +themselves. These servers need to be root initially so they can log +in as commanded by the data coming over the network. + +@var{program} together with @var{arguments} specifies the command to +run to start the server. @var{program} should be an absolute file +name specifying the executable file to run. @var{arguments} consists +of any number of whitespace-separated words, which become the +command-line arguments of @var{program}. The first word in +@var{arguments} is argument zero, which should by convention be the +program name itself (sans directories). + +If you edit @file{/etc/inetd.conf}, you can tell @code{inetd} to reread the +file and obey its new contents by sending the @code{inetd} process the +@code{SIGHUP} signal. You'll have to use @code{ps} to determine the +process ID of the @code{inetd} process as it is not fixed. + +@c !!! could document /etc/inetd.sec + +@node Socket Options +@section Socket Options +@cindex socket options + +This section describes how to read or set various options that modify +the behavior of sockets and their underlying communications protocols. + +@cindex level, for socket options +@cindex socket option level +When you are manipulating a socket option, you must specify which +@dfn{level} the option pertains to. This describes whether the option +applies to the socket interface, or to a lower-level communications +protocol interface. + +@menu +* Socket Option Functions:: The basic functions for setting and getting + socket options. +* Socket-Level Options:: Details of the options at the socket level. +@end menu + +@node Socket Option Functions +@subsection Socket Option Functions + +@pindex sys/socket.h +Here are the functions for examining and modifying socket options. +They are declared in @file{sys/socket.h}. + +@comment sys/socket.h +@comment BSD +@deftypefun int getsockopt (int @var{socket}, int @var{level}, int @var{optname}, void *@var{optval}, socklen_t *@var{optlen-ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getsockopt} function gets information about the value of +option @var{optname} at level @var{level} for socket @var{socket}. + +The option value is stored in the buffer that @var{optval} points to. +Before the call, you should supply in @code{*@var{optlen-ptr}} the +size of this buffer; on return, it contains the number of bytes of +information actually stored in the buffer. + +Most options interpret the @var{optval} buffer as a single @code{int} +value. + +The actual return value of @code{getsockopt} is @code{0} on success +and @code{-1} on failure. The following @code{errno} error conditions +are defined: + +@table @code +@item EBADF +The @var{socket} argument is not a valid file descriptor. + +@item ENOTSOCK +The descriptor @var{socket} is not a socket. + +@item ENOPROTOOPT +The @var{optname} doesn't make sense for the given @var{level}. +@end table +@end deftypefun + +@comment sys/socket.h +@comment BSD +@deftypefun int setsockopt (int @var{socket}, int @var{level}, int @var{optname}, const void *@var{optval}, socklen_t @var{optlen}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is used to set the socket option @var{optname} at level +@var{level} for socket @var{socket}. The value of the option is passed +in the buffer @var{optval} of size @var{optlen}. + +@c Argh. -zw +@iftex +@hfuzz 6pt +The return value and error codes for @code{setsockopt} are the same as +for @code{getsockopt}. +@end iftex +@ifinfo +The return value and error codes for @code{setsockopt} are the same as +for @code{getsockopt}. +@end ifinfo + +@end deftypefun + +@node Socket-Level Options +@subsection Socket-Level Options + +@comment sys/socket.h +@comment BSD +@deftypevr Constant int SOL_SOCKET +Use this constant as the @var{level} argument to @code{getsockopt} or +@code{setsockopt} to manipulate the socket-level options described in +this section. +@end deftypevr + +@pindex sys/socket.h +@noindent +Here is a table of socket-level option names; all are defined in the +header file @file{sys/socket.h}. + +@vtable @code +@comment sys/socket.h +@comment BSD +@item SO_DEBUG +@c Extra blank line here makes the table look better. + +This option toggles recording of debugging information in the underlying +protocol modules. The value has type @code{int}; a nonzero value means +``yes''. +@c !!! should say how this is used +@c OK, anyone who knows, please explain. + +@comment sys/socket.h +@comment BSD +@item SO_REUSEADDR +This option controls whether @code{bind} (@pxref{Setting Address}) +should permit reuse of local addresses for this socket. If you enable +this option, you can actually have two sockets with the same Internet +port number; but the system won't allow you to use the two +identically-named sockets in a way that would confuse the Internet. The +reason for this option is that some higher-level Internet protocols, +including FTP, require you to keep reusing the same port number. + +The value has type @code{int}; a nonzero value means ``yes''. + +@comment sys/socket.h +@comment BSD +@item SO_KEEPALIVE +This option controls whether the underlying protocol should +periodically transmit messages on a connected socket. If the peer +fails to respond to these messages, the connection is considered +broken. The value has type @code{int}; a nonzero value means +``yes''. + +@comment sys/socket.h +@comment BSD +@item SO_DONTROUTE +This option controls whether outgoing messages bypass the normal +message routing facilities. If set, messages are sent directly to the +network interface instead. The value has type @code{int}; a nonzero +value means ``yes''. + +@comment sys/socket.h +@comment BSD +@item SO_LINGER +This option specifies what should happen when the socket of a type +that promises reliable delivery still has untransmitted messages when +it is closed; see @ref{Closing a Socket}. The value has type +@code{struct linger}. + +@comment sys/socket.h +@comment BSD +@deftp {Data Type} {struct linger} +This structure type has the following members: + +@table @code +@item int l_onoff +This field is interpreted as a boolean. If nonzero, @code{close} +blocks until the data are transmitted or the timeout period has expired. + +@item int l_linger +This specifies the timeout period, in seconds. +@end table +@end deftp + +@comment sys/socket.h +@comment BSD +@item SO_BROADCAST +This option controls whether datagrams may be broadcast from the socket. +The value has type @code{int}; a nonzero value means ``yes''. + +@comment sys/socket.h +@comment BSD +@item SO_OOBINLINE +If this option is set, out-of-band data received on the socket is +placed in the normal input queue. This permits it to be read using +@code{read} or @code{recv} without specifying the @code{MSG_OOB} +flag. @xref{Out-of-Band Data}. The value has type @code{int}; a +nonzero value means ``yes''. + +@comment sys/socket.h +@comment BSD +@item SO_SNDBUF +This option gets or sets the size of the output buffer. The value is a +@code{size_t}, which is the size in bytes. + +@comment sys/socket.h +@comment BSD +@item SO_RCVBUF +This option gets or sets the size of the input buffer. The value is a +@code{size_t}, which is the size in bytes. + +@comment sys/socket.h +@comment GNU +@item SO_STYLE +@comment sys/socket.h +@comment BSD +@itemx SO_TYPE +This option can be used with @code{getsockopt} only. It is used to +get the socket's communication style. @code{SO_TYPE} is the +historical name, and @code{SO_STYLE} is the preferred name in GNU. +The value has type @code{int} and its value designates a communication +style; see @ref{Communication Styles}. + +@comment sys/socket.h +@comment BSD +@item SO_ERROR +@c Extra blank line here makes the table look better. + +This option can be used with @code{getsockopt} only. It is used to reset +the error status of the socket. The value is an @code{int}, which represents +the previous error status. +@c !!! what is "socket error status"? this is never defined. +@end vtable + +@node Networks Database +@section Networks Database +@cindex networks database +@cindex converting network number to network name +@cindex converting network name to network number + +@pindex /etc/networks +@pindex netdb.h +Many systems come with a database that records a list of networks known +to the system developer. This is usually kept either in the file +@file{/etc/networks} or in an equivalent from a name server. This data +base is useful for routing programs such as @code{route}, but it is not +useful for programs that simply communicate over the network. We +provide functions to access this database, which are declared in +@file{netdb.h}. + +@comment netdb.h +@comment BSD +@deftp {Data Type} {struct netent} +This data type is used to represent information about entries in the +networks database. It has the following members: + +@table @code +@item char *n_name +This is the ``official'' name of the network. + +@item char **n_aliases +These are alternative names for the network, represented as a vector +of strings. A null pointer terminates the array. + +@item int n_addrtype +This is the type of the network number; this is always equal to +@code{AF_INET} for Internet networks. + +@item unsigned long int n_net +This is the network number. Network numbers are returned in host +byte order; see @ref{Byte Order}. +@end table +@end deftp + +Use the @code{getnetbyname} or @code{getnetbyaddr} functions to search +the networks database for information about a specific network. The +information is returned in a statically-allocated structure; you must +copy the information if you need to save it. + +@comment netdb.h +@comment BSD +@deftypefun {struct netent *} getnetbyname (const char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasurace{:netbyname} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getnetbyname =~ getpwuid @mtasurace:netbyname @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c getnetbyname_r dup @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getnetbyname_r =~ getpwuid_r @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c no nscd support +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c nss_networks_lookup2 =~ nss_passwd_lookup2 @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_getnetbyname_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +The @code{getnetbyname} function returns information about the network +named @var{name}. It returns a null pointer if there is no such +network. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun {struct netent *} getnetbyaddr (uint32_t @var{net}, int @var{type}) +@safety{@prelim{}@mtunsafe{@mtasurace{:netbyaddr} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getnetbyaddr =~ getpwuid @mtasurace:netbyaddr @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c getnetbyaddr_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getnetbyaddr_r =~ getpwuid_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c no nscd support +@c nss_networks_lookup2 =~ nss_passwd_lookup2 @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_getnetbyaddr_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +The @code{getnetbyaddr} function returns information about the network +of type @var{type} with number @var{net}. You should specify a value of +@code{AF_INET} for the @var{type} argument for Internet networks. + +@code{getnetbyaddr} returns a null pointer if there is no such +network. +@end deftypefun + +You can also scan the networks database using @code{setnetent}, +@code{getnetent} and @code{endnetent}. Be careful when using these +functions because they are not reentrant. + +@comment netdb.h +@comment BSD +@deftypefun void setnetent (int @var{stayopen}) +@safety{@prelim{}@mtunsafe{@mtasurace{:netent} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c setnetent @mtasurace:netent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_setent(nss_networks_lookup2) @mtasurace:netent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c setup(nss_networks_lookup2) @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *lookup_fct = nss_networks_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:netent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock dup @aculock +This function opens and rewinds the networks database. + +If the @var{stayopen} argument is nonzero, this sets a flag so that +subsequent calls to @code{getnetbyname} or @code{getnetbyaddr} will +not close the database (as they usually would). This makes for more +efficiency if you call those functions several times, by avoiding +reopening the database for each call. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun {struct netent *} getnetent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:netent} @mtasurace{:netentbuf} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getnetent @mtasurace:netent @mtasurace:netentbuf @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent(getnetent_r) @mtasurace:netent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c malloc dup @ascuheap @acsmem +@c *func = getnetent_r dup @mtasurace:netent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +@c +@c getnetent_r @mtasurace:netent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent_r(nss_networks_lookup2) @mtasurace:netent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c setup(nss_networks_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:servent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *sfct.f @mtasurace:netent @ascuplugin +@c libc_lock_unlock dup @aculock +This function returns the next entry in the networks database. It +returns a null pointer if there are no more entries. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun void endnetent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:netent} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c endnetent @mtasurace:netent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock @asulock @aculock +@c nss_endent(nss_networks_lookup2) @mtasurace:netent @mtsenv @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c res_maybe_init(!preinit) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c setup(nss_networks_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:netent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock @aculock +This function closes the networks database. +@end deftypefun diff --git a/REORG.TODO/manual/startup.texi b/REORG.TODO/manual/startup.texi new file mode 100644 index 0000000000..e4c983ada6 --- /dev/null +++ b/REORG.TODO/manual/startup.texi @@ -0,0 +1,1099 @@ +@node Program Basics, Processes, Signal Handling, Top +@c %MENU% Writing the beginning and end of your program +@chapter The Basic Program/System Interface + +@cindex process +@cindex program +@cindex address space +@cindex thread of control +@dfn{Processes} are the primitive units for allocation of system +resources. Each process has its own address space and (usually) one +thread of control. A process executes a program; you can have multiple +processes executing the same program, but each process has its own copy +of the program within its own address space and executes it +independently of the other copies. Though it may have multiple threads +of control within the same program and a program may be composed of +multiple logically separate modules, a process always executes exactly +one program. + +Note that we are using a specific definition of ``program'' for the +purposes of this manual, which corresponds to a common definition in the +context of Unix systems. In popular usage, ``program'' enjoys a much +broader definition; it can refer for example to a system's kernel, an +editor macro, a complex package of software, or a discrete section of +code executing within a process. + +Writing the program is what this manual is all about. This chapter +explains the most basic interface between your program and the system +that runs, or calls, it. This includes passing of parameters (arguments +and environment) from the system, requesting basic services from the +system, and telling the system the program is done. + +A program starts another program with the @code{exec} family of system calls. +This chapter looks at program startup from the execee's point of view. To +see the event from the execor's point of view, see @ref{Executing a File}. + +@menu +* Program Arguments:: Parsing your program's command-line arguments +* Environment Variables:: Less direct parameters affecting your program +* Auxiliary Vector:: Least direct parameters affecting your program +* System Calls:: Requesting service from the system +* Program Termination:: Telling the system you're done; return status +@end menu + +@node Program Arguments, Environment Variables, , Program Basics +@section Program Arguments +@cindex program arguments +@cindex command line arguments +@cindex arguments, to program + +@cindex program startup +@cindex startup of program +@cindex invocation of program +@cindex @code{main} function +@findex main +The system starts a C program by calling the function @code{main}. It +is up to you to write a function named @code{main}---otherwise, you +won't even be able to link your program without errors. + +In @w{ISO C} you can define @code{main} either to take no arguments, or to +take two arguments that represent the command line arguments to the +program, like this: + +@smallexample +int main (int @var{argc}, char *@var{argv}[]) +@end smallexample + +@cindex argc (program argument count) +@cindex argv (program argument vector) +The command line arguments are the whitespace-separated tokens given in +the shell command used to invoke the program; thus, in @samp{cat foo +bar}, the arguments are @samp{foo} and @samp{bar}. The only way a +program can look at its command line arguments is via the arguments of +@code{main}. If @code{main} doesn't take arguments, then you cannot get +at the command line. + +The value of the @var{argc} argument is the number of command line +arguments. The @var{argv} argument is a vector of C strings; its +elements are the individual command line argument strings. The file +name of the program being run is also included in the vector as the +first element; the value of @var{argc} counts this element. A null +pointer always follows the last element: @code{@var{argv}[@var{argc}]} +is this null pointer. + +For the command @samp{cat foo bar}, @var{argc} is 3 and @var{argv} has +three elements, @code{"cat"}, @code{"foo"} and @code{"bar"}. + +In Unix systems you can define @code{main} a third way, using three arguments: + +@smallexample +int main (int @var{argc}, char *@var{argv}[], char *@var{envp}[]) +@end smallexample + +The first two arguments are just the same. The third argument +@var{envp} gives the program's environment; it is the same as the value +of @code{environ}. @xref{Environment Variables}. POSIX.1 does not +allow this three-argument form, so to be portable it is best to write +@code{main} to take two arguments, and use the value of @code{environ}. + +@menu +* Argument Syntax:: By convention, options start with a hyphen. +* Parsing Program Arguments:: Ways to parse program options and arguments. +@end menu + +@node Argument Syntax, Parsing Program Arguments, , Program Arguments +@subsection Program Argument Syntax Conventions +@cindex program argument syntax +@cindex syntax, for program arguments +@cindex command argument syntax + +POSIX recommends these conventions for command line arguments. +@code{getopt} (@pxref{Getopt}) and @code{argp_parse} (@pxref{Argp}) make +it easy to implement them. + +@itemize @bullet +@item +Arguments are options if they begin with a hyphen delimiter (@samp{-}). + +@item +Multiple options may follow a hyphen delimiter in a single token if +the options do not take arguments. Thus, @samp{-abc} is equivalent to +@samp{-a -b -c}. + +@item +Option names are single alphanumeric characters (as for @code{isalnum}; +@pxref{Classification of Characters}). + +@item +Certain options require an argument. For example, the @samp{-o} command +of the @code{ld} command requires an argument---an output file name. + +@item +An option and its argument may or may not appear as separate tokens. (In +other words, the whitespace separating them is optional.) Thus, +@w{@samp{-o foo}} and @samp{-ofoo} are equivalent. + +@item +Options typically precede other non-option arguments. + +The implementations of @code{getopt} and @code{argp_parse} in @theglibc{} +normally make it appear as if all the option arguments were +specified before all the non-option arguments for the purposes of +parsing, even if the user of your program intermixed option and +non-option arguments. They do this by reordering the elements of the +@var{argv} array. This behavior is nonstandard; if you want to suppress +it, define the @code{_POSIX_OPTION_ORDER} environment variable. +@xref{Standard Environment}. + +@item +The argument @samp{--} terminates all options; any following arguments +are treated as non-option arguments, even if they begin with a hyphen. + +@item +A token consisting of a single hyphen character is interpreted as an +ordinary non-option argument. By convention, it is used to specify +input from or output to the standard input and output streams. + +@item +Options may be supplied in any order, or appear multiple times. The +interpretation is left up to the particular application program. +@end itemize + +@cindex long-named options +GNU adds @dfn{long options} to these conventions. Long options consist +of @samp{--} followed by a name made of alphanumeric characters and +dashes. Option names are typically one to three words long, with +hyphens to separate words. Users can abbreviate the option names as +long as the abbreviations are unique. + +To specify an argument for a long option, write +@samp{--@var{name}=@var{value}}. This syntax enables a long option to +accept an argument that is itself optional. + +Eventually, @gnusystems{} will provide completion for long option names +in the shell. + +@node Parsing Program Arguments, , Argument Syntax, Program Arguments +@subsection Parsing Program Arguments + +@cindex program arguments, parsing +@cindex command arguments, parsing +@cindex parsing program arguments +If the syntax for the command line arguments to your program is simple +enough, you can simply pick the arguments off from @var{argv} by hand. +But unless your program takes a fixed number of arguments, or all of the +arguments are interpreted in the same way (as file names, for example), +you are usually better off using @code{getopt} (@pxref{Getopt}) or +@code{argp_parse} (@pxref{Argp}) to do the parsing. + +@code{getopt} is more standard (the short-option only version of it is a +part of the POSIX standard), but using @code{argp_parse} is often +easier, both for very simple and very complex option structures, because +it does more of the dirty work for you. + +@menu +* Getopt:: Parsing program options using @code{getopt}. +* Argp:: Parsing program options using @code{argp_parse}. +* Suboptions:: Some programs need more detailed options. +* Suboptions Example:: This shows how it could be done for @code{mount}. +@end menu + +@c Getopt and argp start at the @section level so that there's +@c enough room for their internal hierarchy (mostly a problem with +@c argp). -Miles + +@include getopt.texi +@include argp.texi + +@node Suboptions, Suboptions Example, Argp, Parsing Program Arguments +@c This is a @section so that it's at the same level as getopt and argp +@subsubsection Parsing of Suboptions + +Having a single level of options is sometimes not enough. There might +be too many options which have to be available or a set of options is +closely related. + +For this case some programs use suboptions. One of the most prominent +programs is certainly @code{mount}(8). The @code{-o} option take one +argument which itself is a comma separated list of options. To ease the +programming of code like this the function @code{getsubopt} is +available. + +@comment stdlib.h +@deftypefun int getsubopt (char **@var{optionp}, char *const *@var{tokens}, char **@var{valuep}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c getsubopt ok +@c strchrnul dup ok +@c memchr dup ok +@c strncmp dup ok + +The @var{optionp} parameter must be a pointer to a variable containing +the address of the string to process. When the function returns, the +reference is updated to point to the next suboption or to the +terminating @samp{\0} character if there are no more suboptions available. + +The @var{tokens} parameter references an array of strings containing the +known suboptions. All strings must be @samp{\0} terminated and to mark +the end a null pointer must be stored. When @code{getsubopt} finds a +possible legal suboption it compares it with all strings available in +the @var{tokens} array and returns the index in the string as the +indicator. + +In case the suboption has an associated value introduced by a @samp{=} +character, a pointer to the value is returned in @var{valuep}. The +string is @samp{\0} terminated. If no argument is available +@var{valuep} is set to the null pointer. By doing this the caller can +check whether a necessary value is given or whether no unexpected value +is present. + +In case the next suboption in the string is not mentioned in the +@var{tokens} array the starting address of the suboption including a +possible value is returned in @var{valuep} and the return value of the +function is @samp{-1}. +@end deftypefun + +@node Suboptions Example, , Suboptions, Parsing Program Arguments +@subsection Parsing of Suboptions Example + +The code which might appear in the @code{mount}(8) program is a perfect +example of the use of @code{getsubopt}: + +@smallexample +@include subopt.c.texi +@end smallexample + + +@node Environment Variables, Auxiliary Vector, Program Arguments, Program Basics +@section Environment Variables + +@cindex environment variable +When a program is executed, it receives information about the context in +which it was invoked in two ways. The first mechanism uses the +@var{argv} and @var{argc} arguments to its @code{main} function, and is +discussed in @ref{Program Arguments}. The second mechanism uses +@dfn{environment variables} and is discussed in this section. + +The @var{argv} mechanism is typically used to pass command-line +arguments specific to the particular program being invoked. The +environment, on the other hand, keeps track of information that is +shared by many programs, changes infrequently, and that is less +frequently used. + +The environment variables discussed in this section are the same +environment variables that you set using assignments and the +@code{export} command in the shell. Programs executed from the shell +inherit all of the environment variables from the shell. +@c !!! xref to right part of bash manual when it exists + +@cindex environment +Standard environment variables are used for information about the user's +home directory, terminal type, current locale, and so on; you can define +additional variables for other purposes. The set of all environment +variables that have values is collectively known as the +@dfn{environment}. + +Names of environment variables are case-sensitive and must not contain +the character @samp{=}. System-defined environment variables are +invariably uppercase. + +The values of environment variables can be anything that can be +represented as a string. A value must not contain an embedded null +character, since this is assumed to terminate the string. + + +@menu +* Environment Access:: How to get and set the values of + environment variables. +* Standard Environment:: These environment variables have + standard interpretations. +@end menu + +@node Environment Access +@subsection Environment Access +@cindex environment access +@cindex environment representation + +The value of an environment variable can be accessed with the +@code{getenv} function. This is declared in the header file +@file{stdlib.h}. +@pindex stdlib.h + +Libraries should use @code{secure_getenv} instead of @code{getenv}, so +that they do not accidentally use untrusted environment variables. +Modifications of environment variables are not allowed in +multi-threaded programs. The @code{getenv} and @code{secure_getenv} +functions can be safely used in multi-threaded programs. + +@comment stdlib.h +@comment ISO +@deftypefun {char *} getenv (const char *@var{name}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@assafe{}@acsafe{}} +@c Unguarded access to __environ. +This function returns a string that is the value of the environment +variable @var{name}. You must not modify this string. In some non-Unix +systems not using @theglibc{}, it might be overwritten by subsequent +calls to @code{getenv} (but not by any other library function). If the +environment variable @var{name} is not defined, the value is a null +pointer. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun {char *} secure_getenv (const char *@var{name}) +@safety{@prelim{}@mtsafe{@mtsenv{}}@assafe{}@acsafe{}} +@c Calls getenv unless secure mode is enabled. +This function is similar to @code{getenv}, but it returns a null +pointer if the environment is untrusted. This happens when the +program file has SUID or SGID bits set. General-purpose libraries +should always prefer this function over @code{getenv} to avoid +vulnerabilities if the library is referenced from a SUID/SGID program. + +This function is a GNU extension. +@end deftypefun + + +@comment stdlib.h +@comment SVID +@deftypefun int putenv (char *@var{string}) +@safety{@prelim{}@mtunsafe{@mtasuconst{:@mtsenv{}}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c putenv @mtasuconst:@mtsenv @ascuheap @asulock @acucorrupt @aculock @acsmem +@c strchr dup ok +@c strndup dup @ascuheap @acsmem +@c add_to_environ dup @mtasuconst:@mtsenv @ascuheap @asulock @acucorrupt @aculock @acsmem +@c free dup @ascuheap @acsmem +@c unsetenv dup @mtasuconst:@mtsenv @asulock @aculock +The @code{putenv} function adds or removes definitions from the environment. +If the @var{string} is of the form @samp{@var{name}=@var{value}}, the +definition is added to the environment. Otherwise, the @var{string} is +interpreted as the name of an environment variable, and any definition +for this variable in the environment is removed. + +If the function is successful it returns @code{0}. Otherwise the return +value is nonzero and @code{errno} is set to indicate the error. + +The difference to the @code{setenv} function is that the exact string +given as the parameter @var{string} is put into the environment. If the +user should change the string after the @code{putenv} call this will +reflect automatically in the environment. This also requires that +@var{string} not be an automatic variable whose scope is left before the +variable is removed from the environment. The same applies of course to +dynamically allocated variables which are freed later. + +This function is part of the extended Unix interface. You should define +@var{_XOPEN_SOURCE} before including any header. +@end deftypefun + + +@comment stdlib.h +@comment BSD +@deftypefun int setenv (const char *@var{name}, const char *@var{value}, int @var{replace}) +@safety{@prelim{}@mtunsafe{@mtasuconst{:@mtsenv{}}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c setenv @mtasuconst:@mtsenv @ascuheap @asulock @acucorrupt @aculock @acsmem +@c add_to_environ @mtasuconst:@mtsenv @ascuheap @asulock @acucorrupt @aculock @acsmem +@c strlen dup ok +@c libc_lock_lock @asulock @aculock +@c strncmp dup ok +@c realloc dup @ascuheap @acsmem +@c libc_lock_unlock @aculock +@c malloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c mempcpy dup ok +@c memcpy dup ok +@c KNOWN_VALUE ok +@c tfind(strcmp) [no @mtsrace guarded access] +@c strcmp dup ok +@c STORE_VALUE @ascuheap @acucorrupt @acsmem +@c tsearch(strcmp) @ascuheap @acucorrupt @acsmem [no @mtsrace or @asucorrupt guarded access makes for mtsafe and @asulock] +@c strcmp dup ok +The @code{setenv} function can be used to add a new definition to the +environment. The entry with the name @var{name} is replaced by the +value @samp{@var{name}=@var{value}}. Please note that this is also true +if @var{value} is the empty string. To do this a new string is created +and the strings @var{name} and @var{value} are copied. A null pointer +for the @var{value} parameter is illegal. If the environment already +contains an entry with key @var{name} the @var{replace} parameter +controls the action. If replace is zero, nothing happens. Otherwise +the old entry is replaced by the new one. + +Please note that you cannot remove an entry completely using this function. + +If the function is successful it returns @code{0}. Otherwise the +environment is unchanged and the return value is @code{-1} and +@code{errno} is set. + +This function was originally part of the BSD library but is now part of +the Unix standard. +@end deftypefun + +@comment stdlib.h +@comment BSD +@deftypefun int unsetenv (const char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasuconst{:@mtsenv{}}}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c unsetenv @mtasuconst:@mtsenv @asulock @aculock +@c strchr dup ok +@c strlen dup ok +@c libc_lock_lock @asulock @aculock +@c strncmp dup ok +@c libc_lock_unlock @aculock +Using this function one can remove an entry completely from the +environment. If the environment contains an entry with the key +@var{name} this whole entry is removed. A call to this function is +equivalent to a call to @code{putenv} when the @var{value} part of the +string is empty. + +The function returns @code{-1} if @var{name} is a null pointer, points to +an empty string, or points to a string containing a @code{=} character. +It returns @code{0} if the call succeeded. + +This function was originally part of the BSD library but is now part of +the Unix standard. The BSD version had no return value, though. +@end deftypefun + +There is one more function to modify the whole environment. This +function is said to be used in the POSIX.9 (POSIX bindings for Fortran +77) and so one should expect it did made it into POSIX.1. But this +never happened. But we still provide this function as a GNU extension +to enable writing standard compliant Fortran environments. + +@comment stdlib.h +@comment GNU +@deftypefun int clearenv (void) +@safety{@prelim{}@mtunsafe{@mtasuconst{:@mtsenv{}}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{}}} +@c clearenv @mtasuconst:@mtsenv @ascuheap @asulock @aculock @acsmem +@c libc_lock_lock @asulock @aculock +@c free dup @ascuheap @acsmem +@c libc_lock_unlock @aculock +The @code{clearenv} function removes all entries from the environment. +Using @code{putenv} and @code{setenv} new entries can be added again +later. + +If the function is successful it returns @code{0}. Otherwise the return +value is nonzero. +@end deftypefun + + +You can deal directly with the underlying representation of environment +objects to add more variables to the environment (for example, to +communicate with another program you are about to execute; +@pxref{Executing a File}). + +@comment unistd.h +@comment POSIX.1 +@deftypevar {char **} environ +The environment is represented as an array of strings. Each string is +of the format @samp{@var{name}=@var{value}}. The order in which +strings appear in the environment is not significant, but the same +@var{name} must not appear more than once. The last element of the +array is a null pointer. + +This variable is declared in the header file @file{unistd.h}. + +If you just want to get the value of an environment variable, use +@code{getenv}. +@end deftypevar + +Unix systems, and @gnusystems{}, pass the initial value of +@code{environ} as the third argument to @code{main}. +@xref{Program Arguments}. + +@node Standard Environment +@subsection Standard Environment Variables +@cindex standard environment variables + +These environment variables have standard meanings. This doesn't mean +that they are always present in the environment; but if these variables +@emph{are} present, they have these meanings. You shouldn't try to use +these environment variable names for some other purpose. + +@comment Extra blank lines make it look better. +@table @code +@item HOME +@cindex @code{HOME} environment variable +@cindex home directory + +This is a string representing the user's @dfn{home directory}, or +initial default working directory. + +The user can set @code{HOME} to any value. +If you need to make sure to obtain the proper home directory +for a particular user, you should not use @code{HOME}; instead, +look up the user's name in the user database (@pxref{User Database}). + +For most purposes, it is better to use @code{HOME}, precisely because +this lets the user specify the value. + +@c !!! also USER +@item LOGNAME +@cindex @code{LOGNAME} environment variable + +This is the name that the user used to log in. Since the value in the +environment can be tweaked arbitrarily, this is not a reliable way to +identify the user who is running a program; a function like +@code{getlogin} (@pxref{Who Logged In}) is better for that purpose. + +For most purposes, it is better to use @code{LOGNAME}, precisely because +this lets the user specify the value. + +@item PATH +@cindex @code{PATH} environment variable + +A @dfn{path} is a sequence of directory names which is used for +searching for a file. The variable @code{PATH} holds a path used +for searching for programs to be run. + +The @code{execlp} and @code{execvp} functions (@pxref{Executing a File}) +use this environment variable, as do many shells and other utilities +which are implemented in terms of those functions. + +The syntax of a path is a sequence of directory names separated by +colons. An empty string instead of a directory name stands for the +current directory (@pxref{Working Directory}). + +A typical value for this environment variable might be a string like: + +@smallexample +:/bin:/etc:/usr/bin:/usr/new/X11:/usr/new:/usr/local/bin +@end smallexample + +This means that if the user tries to execute a program named @code{foo}, +the system will look for files named @file{foo}, @file{/bin/foo}, +@file{/etc/foo}, and so on. The first of these files that exists is +the one that is executed. + +@c !!! also TERMCAP +@item TERM +@cindex @code{TERM} environment variable + +This specifies the kind of terminal that is receiving program output. +Some programs can make use of this information to take advantage of +special escape sequences or terminal modes supported by particular kinds +of terminals. Many programs which use the termcap library +(@pxref{Finding a Terminal Description,Find,,termcap,The Termcap Library +Manual}) use the @code{TERM} environment variable, for example. + +@item TZ +@cindex @code{TZ} environment variable + +This specifies the time zone. @xref{TZ Variable}, for information about +the format of this string and how it is used. + +@item LANG +@cindex @code{LANG} environment variable + +This specifies the default locale to use for attribute categories where +neither @code{LC_ALL} nor the specific environment variable for that +category is set. @xref{Locales}, for more information about +locales. + +@ignore +@c I doubt this really exists +@item LC_ALL +@cindex @code{LC_ALL} environment variable + +This is similar to the @code{LANG} environment variable. However, its +value takes precedence over any values provided for the individual +attribute category environment variables, or for the @code{LANG} +environment variable. +@end ignore + +@item LC_ALL +@cindex @code{LC_ALL} environment variable + +If this environment variable is set it overrides the selection for all +the locales done using the other @code{LC_*} environment variables. The +value of the other @code{LC_*} environment variables is simply ignored +in this case. + +@item LC_COLLATE +@cindex @code{LC_COLLATE} environment variable + +This specifies what locale to use for string sorting. + +@item LC_CTYPE +@cindex @code{LC_CTYPE} environment variable + +This specifies what locale to use for character sets and character +classification. + +@item LC_MESSAGES +@cindex @code{LC_MESSAGES} environment variable + +This specifies what locale to use for printing messages and to parse +responses. + +@item LC_MONETARY +@cindex @code{LC_MONETARY} environment variable + +This specifies what locale to use for formatting monetary values. + +@item LC_NUMERIC +@cindex @code{LC_NUMERIC} environment variable + +This specifies what locale to use for formatting numbers. + +@item LC_TIME +@cindex @code{LC_TIME} environment variable + +This specifies what locale to use for formatting date/time values. + +@item NLSPATH +@cindex @code{NLSPATH} environment variable + +This specifies the directories in which the @code{catopen} function +looks for message translation catalogs. + +@item _POSIX_OPTION_ORDER +@cindex @code{_POSIX_OPTION_ORDER} environment variable. + +If this environment variable is defined, it suppresses the usual +reordering of command line arguments by @code{getopt} and +@code{argp_parse}. @xref{Argument Syntax}. + +@c !!! GNU also has COREFILE, CORESERVER, EXECSERVERS +@end table + +@node Auxiliary Vector +@section Auxiliary Vector +@cindex auxiliary vector + +When a program is executed, it receives information from the operating +system about the environment in which it is operating. The form of this +information is a table of key-value pairs, where the keys are from the +set of @samp{AT_} values in @file{elf.h}. Some of the data is provided +by the kernel for libc consumption, and may be obtained by ordinary +interfaces, such as @code{sysconf}. However, on a platform-by-platform +basis there may be information that is not available any other way. + +@subsection Definition of @code{getauxval} +@comment sys/auxv.h +@deftypefun {unsigned long int} getauxval (unsigned long int @var{type}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Reads from hwcap or iterates over constant auxv. +This function is used to inquire about the entries in the auxiliary +vector. The @var{type} argument should be one of the @samp{AT_} symbols +defined in @file{elf.h}. If a matching entry is found, the value is +returned; if the entry is not found, zero is returned and @code{errno} is +set to @code{ENOENT}. +@end deftypefun + +For some platforms, the key @code{AT_HWCAP} is the easiest way to inquire +about any instruction set extensions available at runtime. In this case, +there will (of necessity) be a platform-specific set of @samp{HWCAP_} +values masked together that describe the capabilities of the cpu on which +the program is being executed. + +@node System Calls +@section System Calls + +@cindex system call +A system call is a request for service that a program makes of the +kernel. The service is generally something that only the kernel has +the privilege to do, such as doing I/O. Programmers don't normally +need to be concerned with system calls because there are functions in +@theglibc{} to do virtually everything that system calls do. +These functions work by making system calls themselves. For example, +there is a system call that changes the permissions of a file, but +you don't need to know about it because you can just use @theglibc{}'s +@code{chmod} function. + +@cindex kernel call +System calls are sometimes called kernel calls. + +However, there are times when you want to make a system call explicitly, +and for that, @theglibc{} provides the @code{syscall} function. +@code{syscall} is harder to use and less portable than functions like +@code{chmod}, but easier and more portable than coding the system call +in assembler instructions. + +@code{syscall} is most useful when you are working with a system call +which is special to your system or is newer than @theglibc{} you +are using. @code{syscall} is implemented in an entirely generic way; +the function does not know anything about what a particular system +call does or even if it is valid. + +The description of @code{syscall} in this section assumes a certain +protocol for system calls on the various platforms on which @theglibc{} +runs. That protocol is not defined by any strong authority, but +we won't describe it here either because anyone who is coding +@code{syscall} probably won't accept anything less than kernel and C +library source code as a specification of the interface between them +anyway. + + +@code{syscall} is declared in @file{unistd.h}. + +@comment unistd.h +@comment ??? +@deftypefun {long int} syscall (long int @var{sysno}, @dots{}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{syscall} performs a generic system call. + +@cindex system call number +@var{sysno} is the system call number. Each kind of system call is +identified by a number. Macros for all the possible system call numbers +are defined in @file{sys/syscall.h} + +The remaining arguments are the arguments for the system call, in +order, and their meanings depend on the kind of system call. Each kind +of system call has a definite number of arguments, from zero to five. +If you code more arguments than the system call takes, the extra ones to +the right are ignored. + +The return value is the return value from the system call, unless the +system call failed. In that case, @code{syscall} returns @code{-1} and +sets @code{errno} to an error code that the system call returned. Note +that system calls do not return @code{-1} when they succeed. +@cindex errno + +If you specify an invalid @var{sysno}, @code{syscall} returns @code{-1} +with @code{errno} = @code{ENOSYS}. + +Example: + +@smallexample + +#include <unistd.h> +#include <sys/syscall.h> +#include <errno.h> + +@dots{} + +int rc; + +rc = syscall(SYS_chmod, "/etc/passwd", 0444); + +if (rc == -1) + fprintf(stderr, "chmod failed, errno = %d\n", errno); + +@end smallexample + +This, if all the compatibility stars are aligned, is equivalent to the +following preferable code: + +@smallexample + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> + +@dots{} + +int rc; + +rc = chmod("/etc/passwd", 0444); +if (rc == -1) + fprintf(stderr, "chmod failed, errno = %d\n", errno); + +@end smallexample + +@end deftypefun + + +@node Program Termination +@section Program Termination +@cindex program termination +@cindex process termination + +@cindex exit status value +The usual way for a program to terminate is simply for its @code{main} +function to return. The @dfn{exit status value} returned from the +@code{main} function is used to report information back to the process's +parent process or shell. + +A program can also terminate normally by calling the @code{exit} +function. + +In addition, programs can be terminated by signals; this is discussed in +more detail in @ref{Signal Handling}. The @code{abort} function causes +a signal that kills the program. + +@menu +* Normal Termination:: If a program calls @code{exit}, a + process terminates normally. +* Exit Status:: The @code{exit status} provides information + about why the process terminated. +* Cleanups on Exit:: A process can run its own cleanup + functions upon normal termination. +* Aborting a Program:: The @code{abort} function causes + abnormal program termination. +* Termination Internals:: What happens when a process terminates. +@end menu + +@node Normal Termination +@subsection Normal Termination + +A process terminates normally when its program signals it is done by +calling @code{exit}. Returning from @code{main} is equivalent to +calling @code{exit}, and the value that @code{main} returns is used as +the argument to @code{exit}. + +@comment stdlib.h +@comment ISO +@deftypefun void exit (int @var{status}) +@safety{@prelim{}@mtunsafe{@mtasurace{:exit}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +@c Access to the atexit/on_exit list, the libc_atexit hook and tls dtors +@c is not guarded. Streams must be flushed, and that triggers the usual +@c AS and AC issues with streams. +The @code{exit} function tells the system that the program is done, which +causes it to terminate the process. + +@var{status} is the program's exit status, which becomes part of the +process' termination status. This function does not return. +@end deftypefun + +Normal termination causes the following actions: + +@enumerate +@item +Functions that were registered with the @code{atexit} or @code{on_exit} +functions are called in the reverse order of their registration. This +mechanism allows your application to specify its own ``cleanup'' actions +to be performed at program termination. Typically, this is used to do +things like saving program state information in a file, or unlocking +locks in shared data bases. + +@item +All open streams are closed, writing out any buffered output data. See +@ref{Closing Streams}. In addition, temporary files opened +with the @code{tmpfile} function are removed; see @ref{Temporary Files}. + +@item +@code{_exit} is called, terminating the program. @xref{Termination Internals}. +@end enumerate + +@node Exit Status +@subsection Exit Status +@cindex exit status + +When a program exits, it can return to the parent process a small +amount of information about the cause of termination, using the +@dfn{exit status}. This is a value between 0 and 255 that the exiting +process passes as an argument to @code{exit}. + +Normally you should use the exit status to report very broad information +about success or failure. You can't provide a lot of detail about the +reasons for the failure, and most parent processes would not want much +detail anyway. + +There are conventions for what sorts of status values certain programs +should return. The most common convention is simply 0 for success and 1 +for failure. Programs that perform comparison use a different +convention: they use status 1 to indicate a mismatch, and status 2 to +indicate an inability to compare. Your program should follow an +existing convention if an existing convention makes sense for it. + +A general convention reserves status values 128 and up for special +purposes. In particular, the value 128 is used to indicate failure to +execute another program in a subprocess. This convention is not +universally obeyed, but it is a good idea to follow it in your programs. + +@strong{Warning:} Don't try to use the number of errors as the exit +status. This is actually not very useful; a parent process would +generally not care how many errors occurred. Worse than that, it does +not work, because the status value is truncated to eight bits. +Thus, if the program tried to report 256 errors, the parent would +receive a report of 0 errors---that is, success. + +For the same reason, it does not work to use the value of @code{errno} +as the exit status---these can exceed 255. + +@strong{Portability note:} Some non-POSIX systems use different +conventions for exit status values. For greater portability, you can +use the macros @code{EXIT_SUCCESS} and @code{EXIT_FAILURE} for the +conventional status value for success and failure, respectively. They +are declared in the file @file{stdlib.h}. +@pindex stdlib.h + +@comment stdlib.h +@comment ISO +@deftypevr Macro int EXIT_SUCCESS +This macro can be used with the @code{exit} function to indicate +successful program completion. + +On POSIX systems, the value of this macro is @code{0}. On other +systems, the value might be some other (possibly non-constant) integer +expression. +@end deftypevr + +@comment stdlib.h +@comment ISO +@deftypevr Macro int EXIT_FAILURE +This macro can be used with the @code{exit} function to indicate +unsuccessful program completion in a general sense. + +On POSIX systems, the value of this macro is @code{1}. On other +systems, the value might be some other (possibly non-constant) integer +expression. Other nonzero status values also indicate failures. Certain +programs use different nonzero status values to indicate particular +kinds of "non-success". For example, @code{diff} uses status value +@code{1} to mean that the files are different, and @code{2} or more to +mean that there was difficulty in opening the files. +@end deftypevr + +Don't confuse a program's exit status with a process' termination status. +There are lots of ways a process can terminate besides having its program +finish. In the event that the process termination @emph{is} caused by program +termination (i.e., @code{exit}), though, the program's exit status becomes +part of the process' termination status. + +@node Cleanups on Exit +@subsection Cleanups on Exit + +Your program can arrange to run its own cleanup functions if normal +termination happens. If you are writing a library for use in various +application programs, then it is unreliable to insist that all +applications call the library's cleanup functions explicitly before +exiting. It is much more robust to make the cleanup invisible to the +application, by setting up a cleanup function in the library itself +using @code{atexit} or @code{on_exit}. + +@comment stdlib.h +@comment ISO +@deftypefun int atexit (void (*@var{function}) (void)) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{}}} +@c atexit @ascuheap @asulock @aculock @acsmem +@c cxa_atexit @ascuheap @asulock @aculock @acsmem +@c __internal_atexit @ascuheap @asulock @aculock @acsmem +@c __new_exitfn @ascuheap @asulock @aculock @acsmem +@c __libc_lock_lock @asulock @aculock +@c calloc dup @ascuheap @acsmem +@c __libc_lock_unlock @aculock +@c atomic_write_barrier dup ok +The @code{atexit} function registers the function @var{function} to be +called at normal program termination. The @var{function} is called with +no arguments. + +The return value from @code{atexit} is zero on success and nonzero if +the function cannot be registered. +@end deftypefun + +@comment stdlib.h +@comment SunOS +@deftypefun int on_exit (void (*@var{function})(int @var{status}, void *@var{arg}), void *@var{arg}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{}}} +@c on_exit @ascuheap @asulock @aculock @acsmem +@c new_exitfn dup @ascuheap @asulock @aculock @acsmem +@c atomic_write_barrier dup ok +This function is a somewhat more powerful variant of @code{atexit}. It +accepts two arguments, a function @var{function} and an arbitrary +pointer @var{arg}. At normal program termination, the @var{function} is +called with two arguments: the @var{status} value passed to @code{exit}, +and the @var{arg}. + +This function is included in @theglibc{} only for compatibility +for SunOS, and may not be supported by other implementations. +@end deftypefun + +Here's a trivial program that illustrates the use of @code{exit} and +@code{atexit}: + +@smallexample +@include atexit.c.texi +@end smallexample + +@noindent +When this program is executed, it just prints the message and exits. + +@node Aborting a Program +@subsection Aborting a Program +@cindex aborting a program + +You can abort your program using the @code{abort} function. The prototype +for this function is in @file{stdlib.h}. +@pindex stdlib.h + +@comment stdlib.h +@comment ISO +@deftypefun void abort (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +@c The implementation takes a recursive lock and attempts to support +@c calls from signal handlers, but if we're in the middle of flushing or +@c using streams, we may encounter them in inconsistent states. +The @code{abort} function causes abnormal program termination. This +does not execute cleanup functions registered with @code{atexit} or +@code{on_exit}. + +This function actually terminates the process by raising a +@code{SIGABRT} signal, and your program can include a handler to +intercept this signal; see @ref{Signal Handling}. +@end deftypefun + +@c Put in by rms. Don't remove. +@cartouche +@strong{Future Change Warning:} Proposed Federal censorship regulations +may prohibit us from giving you information about the possibility of +calling this function. We would be required to say that this is not an +acceptable way of terminating a program. +@end cartouche + +@node Termination Internals +@subsection Termination Internals + +The @code{_exit} function is the primitive used for process termination +by @code{exit}. It is declared in the header file @file{unistd.h}. +@pindex unistd.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun void _exit (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall (exit_group or exit); calls __task_terminate on hurd, +@c and abort in the generic posix implementation. +The @code{_exit} function is the primitive for causing a process to +terminate with status @var{status}. Calling this function does not +execute cleanup functions registered with @code{atexit} or +@code{on_exit}. +@end deftypefun + +@comment stdlib.h +@comment ISO +@deftypefun void _Exit (int @var{status}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Alias for _exit. +The @code{_Exit} function is the @w{ISO C} equivalent to @code{_exit}. +The @w{ISO C} committee members were not sure whether the definitions of +@code{_exit} and @code{_Exit} were compatible so they have not used the +POSIX name. + +This function was introduced in @w{ISO C99} and is declared in +@file{stdlib.h}. +@end deftypefun + +When a process terminates for any reason---either because the program +terminates, or as a result of a signal---the +following things happen: + +@itemize @bullet +@item +All open file descriptors in the process are closed. @xref{Low-Level I/O}. +Note that streams are not flushed automatically when the process +terminates; see @ref{I/O on Streams}. + +@item +A process exit status is saved to be reported back to the parent process +via @code{wait} or @code{waitpid}; see @ref{Process Completion}. If the +program exited, this status includes as its low-order 8 bits the program +exit status. + + +@item +Any child processes of the process being terminated are assigned a new +parent process. (On most systems, including GNU, this is the @code{init} +process, with process ID 1.) + +@item +A @code{SIGCHLD} signal is sent to the parent process. + +@item +If the process is a session leader that has a controlling terminal, then +a @code{SIGHUP} signal is sent to each process in the foreground job, +and the controlling terminal is disassociated from that session. +@xref{Job Control}. + +@item +If termination of a process causes a process group to become orphaned, +and any member of that process group is stopped, then a @code{SIGHUP} +signal and a @code{SIGCONT} signal are sent to each process in the +group. @xref{Job Control}. +@end itemize diff --git a/REORG.TODO/manual/stdio-fp.c b/REORG.TODO/manual/stdio-fp.c new file mode 100644 index 0000000000..db9480782d --- /dev/null +++ b/REORG.TODO/manual/stdio-fp.c @@ -0,0 +1,28 @@ +/* This program is to generate one of the examples in stdio.texi. */ + +#include <stdio.h> + + +static void +print (double v) +{ + printf ("|%13.4a|%13.4f|%13.4e|%13.4g|\n", v, v, v, v); +} + + +int +main (void) +{ + print (0.0); + print (0.5); + print (1.0); + print (-1.0); + print (100.0); + print (1000.0); + print (10000.0); + print (12345.0); + print (100000.0); + print (123456.0); + + return 0; +} diff --git a/REORG.TODO/manual/stdio.texi b/REORG.TODO/manual/stdio.texi new file mode 100644 index 0000000000..29f3fed89b --- /dev/null +++ b/REORG.TODO/manual/stdio.texi @@ -0,0 +1,5663 @@ +@node I/O on Streams, Low-Level I/O, I/O Overview, Top +@c %MENU% High-level, portable I/O facilities +@chapter Input/Output on Streams +@c fix an overfull: +@tex +\hyphenation{which-ever} +@end tex + +This chapter describes the functions for creating streams and performing +input and output operations on them. As discussed in @ref{I/O +Overview}, a stream is a fairly abstract, high-level concept +representing a communications channel to a file, device, or process. + +@menu +* Streams:: About the data type representing a stream. +* Standard Streams:: Streams to the standard input and output + devices are created for you. +* Opening Streams:: How to create a stream to talk to a file. +* Closing Streams:: Close a stream when you are finished with it. +* Streams and Threads:: Issues with streams in threaded programs. +* Streams and I18N:: Streams in internationalized applications. +* Simple Output:: Unformatted output by characters and lines. +* Character Input:: Unformatted input by characters and words. +* Line Input:: Reading a line or a record from a stream. +* Unreading:: Peeking ahead/pushing back input just read. +* Block Input/Output:: Input and output operations on blocks of data. +* Formatted Output:: @code{printf} and related functions. +* Customizing Printf:: You can define new conversion specifiers for + @code{printf} and friends. +* Formatted Input:: @code{scanf} and related functions. +* EOF and Errors:: How you can tell if an I/O error happens. +* Error Recovery:: What you can do about errors. +* Binary Streams:: Some systems distinguish between text files + and binary files. +* File Positioning:: About random-access streams. +* Portable Positioning:: Random access on peculiar ISO C systems. +* Stream Buffering:: How to control buffering of streams. +* Other Kinds of Streams:: Streams that do not necessarily correspond + to an open file. +* Formatted Messages:: Print strictly formatted messages. +@end menu + +@node Streams +@section Streams + +For historical reasons, the type of the C data structure that represents +a stream is called @code{FILE} rather than ``stream''. Since most of +the library functions deal with objects of type @code{FILE *}, sometimes +the term @dfn{file pointer} is also used to mean ``stream''. This leads +to unfortunate confusion over terminology in many books on C. This +manual, however, is careful to use the terms ``file'' and ``stream'' +only in the technical sense. +@cindex file pointer + +@pindex stdio.h +The @code{FILE} type is declared in the header file @file{stdio.h}. + +@comment stdio.h +@comment ISO +@deftp {Data Type} FILE +This is the data type used to represent stream objects. A @code{FILE} +object holds all of the internal state information about the connection +to the associated file, including such things as the file position +indicator and buffering information. Each stream also has error and +end-of-file status indicators that can be tested with the @code{ferror} +and @code{feof} functions; see @ref{EOF and Errors}. +@end deftp + +@code{FILE} objects are allocated and managed internally by the +input/output library functions. Don't try to create your own objects of +type @code{FILE}; let the library do it. Your programs should +deal only with pointers to these objects (that is, @code{FILE *} values) +rather than the objects themselves. +@c !!! should say that FILE's have "No user-serviceable parts inside." + +@node Standard Streams +@section Standard Streams +@cindex standard streams +@cindex streams, standard + +When the @code{main} function of your program is invoked, it already has +three predefined streams open and available for use. These represent +the ``standard'' input and output channels that have been established +for the process. + +These streams are declared in the header file @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypevar {FILE *} stdin +The @dfn{standard input} stream, which is the normal source of input for the +program. +@end deftypevar +@cindex standard input stream + +@comment stdio.h +@comment ISO +@deftypevar {FILE *} stdout +The @dfn{standard output} stream, which is used for normal output from +the program. +@end deftypevar +@cindex standard output stream + +@comment stdio.h +@comment ISO +@deftypevar {FILE *} stderr +The @dfn{standard error} stream, which is used for error messages and +diagnostics issued by the program. +@end deftypevar +@cindex standard error stream + +On @gnusystems{}, you can specify what files or processes correspond to +these streams using the pipe and redirection facilities provided by the +shell. (The primitives shells use to implement these facilities are +described in @ref{File System Interface}.) Most other operating systems +provide similar mechanisms, but the details of how to use them can vary. + +In @theglibc{}, @code{stdin}, @code{stdout}, and @code{stderr} are +normal variables which you can set just like any others. For example, +to redirect the standard output to a file, you could do: + +@smallexample +fclose (stdout); +stdout = fopen ("standard-output-file", "w"); +@end smallexample + +Note however, that in other systems @code{stdin}, @code{stdout}, and +@code{stderr} are macros that you cannot assign to in the normal way. +But you can use @code{freopen} to get the effect of closing one and +reopening it. @xref{Opening Streams}. + +The three streams @code{stdin}, @code{stdout}, and @code{stderr} are not +unoriented at program start (@pxref{Streams and I18N}). + +@node Opening Streams +@section Opening Streams + +@cindex opening a stream +Opening a file with the @code{fopen} function creates a new stream and +establishes a connection between the stream and a file. This may +involve creating a new file. + +@pindex stdio.h +Everything described in this section is declared in the header file +@file{stdio.h}. + +@comment stdio.h +@comment ISO +@deftypefun {FILE *} fopen (const char *@var{filename}, const char *@var{opentype}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @acsfd{} @aculock{}}} +@c fopen may leak the list lock if cancelled within _IO_link_in. +The @code{fopen} function opens a stream for I/O to the file +@var{filename}, and returns a pointer to the stream. + +The @var{opentype} argument is a string that controls how the file is +opened and specifies attributes of the resulting stream. It must begin +with one of the following sequences of characters: + +@table @samp +@item r +Open an existing file for reading only. + +@item w +Open the file for writing only. If the file already exists, it is +truncated to zero length. Otherwise a new file is created. + +@item a +Open a file for append access; that is, writing at the end of file only. +If the file already exists, its initial contents are unchanged and +output to the stream is appended to the end of the file. +Otherwise, a new, empty file is created. + +@item r+ +Open an existing file for both reading and writing. The initial contents +of the file are unchanged and the initial file position is at the +beginning of the file. + +@item w+ +Open a file for both reading and writing. If the file already exists, it +is truncated to zero length. Otherwise, a new file is created. + +@item a+ +Open or create file for both reading and appending. If the file exists, +its initial contents are unchanged. Otherwise, a new file is created. +The initial file position for reading is at the beginning of the file, +but output is always appended to the end of the file. +@end table + +As you can see, @samp{+} requests a stream that can do both input and +output. When using such a stream, you must call @code{fflush} +(@pxref{Stream Buffering}) or a file positioning function such as +@code{fseek} (@pxref{File Positioning}) when switching from reading +to writing or vice versa. Otherwise, internal buffers might not be +emptied properly. + +Additional characters may appear after these to specify flags for the +call. Always put the mode (@samp{r}, @samp{w+}, etc.) first; that is +the only part you are guaranteed will be understood by all systems. + +@Theglibc{} defines additional characters for use in @var{opentype}: + +@table @samp +@item c +The file is opened with cancellation in the I/O functions disabled. + +@item e +The underlying file descriptor will be closed if you use any of the +@code{exec@dots{}} functions (@pxref{Executing a File}). (This is +equivalent to having set @code{FD_CLOEXEC} on that descriptor. +@xref{Descriptor Flags}.) + +@item m +The file is opened and accessed using @code{mmap}. This is only +supported with files opened for reading. + +@item x +Insist on creating a new file---if a file @var{filename} already +exists, @code{fopen} fails rather than opening it. If you use +@samp{x} you are guaranteed that you will not clobber an existing +file. This is equivalent to the @code{O_EXCL} option to the +@code{open} function (@pxref{Opening and Closing Files}). + +The @samp{x} modifier is part of @w{ISO C11}. +@end table + +The character @samp{b} in @var{opentype} has a standard meaning; it +requests a binary stream rather than a text stream. But this makes no +difference in POSIX systems (including @gnusystems{}). If both +@samp{+} and @samp{b} are specified, they can appear in either order. +@xref{Binary Streams}. + +@cindex stream orientation +@cindex orientation, stream +If the @var{opentype} string contains the sequence +@code{,ccs=@var{STRING}} then @var{STRING} is taken as the name of a +coded character set and @code{fopen} will mark the stream as +wide-oriented with appropriate conversion functions in place to convert +from and to the character set @var{STRING}. Any other stream +is opened initially unoriented and the orientation is decided with the +first file operation. If the first operation is a wide character +operation, the stream is not only marked as wide-oriented, also the +conversion functions to convert to the coded character set used for the +current locale are loaded. This will not change anymore from this point +on even if the locale selected for the @code{LC_CTYPE} category is +changed. + +Any other characters in @var{opentype} are simply ignored. They may be +meaningful in other systems. + +If the open fails, @code{fopen} returns a null pointer. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine this function is in fact @code{fopen64} since the LFS +interface replaces transparently the old interface. +@end deftypefun + +You can have multiple streams (or file descriptors) pointing to the same +file open at the same time. If you do only input, this works +straightforwardly, but you must be careful if any output streams are +included. @xref{Stream/Descriptor Precautions}. This is equally true +whether the streams are in one program (not usual) or in several +programs (which can easily happen). It may be advantageous to use the +file locking facilities to avoid simultaneous access. @xref{File +Locks}. + +@comment stdio.h +@comment Unix98 +@deftypefun {FILE *} fopen64 (const char *@var{filename}, const char *@var{opentype}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @acsfd{} @aculock{}}} +This function is similar to @code{fopen} but the stream it returns a +pointer for is opened using @code{open64}. Therefore this stream can be +used even on files larger than @twoexp{31} bytes on 32 bit machines. + +Please note that the return type is still @code{FILE *}. There is no +special @code{FILE} type for the LFS interface. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a 32 +bits machine this function is available under the name @code{fopen} +and so transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypevr Macro int FOPEN_MAX +The value of this macro is an integer constant expression that +represents the minimum number of streams that the implementation +guarantees can be open simultaneously. You might be able to open more +than this many streams, but that is not guaranteed. The value of this +constant is at least eight, which includes the three standard streams +@code{stdin}, @code{stdout}, and @code{stderr}. In POSIX.1 systems this +value is determined by the @code{OPEN_MAX} parameter; @pxref{General +Limits}. In BSD and GNU, it is controlled by the @code{RLIMIT_NOFILE} +resource limit; @pxref{Limits on Resources}. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypefun {FILE *} freopen (const char *@var{filename}, const char *@var{opentype}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @acsfd{}}} +@c Like most I/O operations, this one is guarded by a recursive lock, +@c released even upon cancellation, but cancellation may leak file +@c descriptors and leave the stream in an inconsistent state (e.g., +@c still bound to the closed descriptor). Also, if the stream is +@c part-way through a significant update (say running freopen) when a +@c signal handler calls freopen again on the same stream, the result is +@c likely to be an inconsistent stream, and the possibility of closing +@c twice file descriptor number that the stream used to use, the second +@c time when it might have already been reused by another thread. +This function is like a combination of @code{fclose} and @code{fopen}. +It first closes the stream referred to by @var{stream}, ignoring any +errors that are detected in the process. (Because errors are ignored, +you should not use @code{freopen} on an output stream if you have +actually done any output using the stream.) Then the file named by +@var{filename} is opened with mode @var{opentype} as for @code{fopen}, +and associated with the same stream object @var{stream}. + +If the operation fails, a null pointer is returned; otherwise, +@code{freopen} returns @var{stream}. On Linux, @code{freopen} may also +fail and set @code{errno} to @code{EBUSY} when the kernel structure for +the old file descriptor was not initialized completely before @code{freopen} +was called. This can only happen in multi-threaded programs, when two +threads race to allocate the same file descriptor number. To avoid the +possibility of this race, do not use @code{close} to close the underlying +file descriptor for a @code{FILE}; either use @code{freopen} while the +file is still open, or use @code{open} and then @code{dup2} to install +the new file descriptor. + +@code{freopen} has traditionally been used to connect a standard stream +such as @code{stdin} with a file of your own choice. This is useful in +programs in which use of a standard stream for certain purposes is +hard-coded. In @theglibc{}, you can simply close the standard +streams and open new ones with @code{fopen}. But other systems lack +this ability, so using @code{freopen} is more portable. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bit machine this function is in fact @code{freopen64} since the LFS +interface replaces transparently the old interface. +@end deftypefun + +@comment stdio.h +@comment Unix98 +@deftypefun {FILE *} freopen64 (const char *@var{filename}, const char *@var{opentype}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @acsfd{}}} +This function is similar to @code{freopen}. The only difference is that +on 32 bit machine the stream returned is able to read beyond the +@twoexp{31} bytes limits imposed by the normal interface. It should be +noted that the stream pointed to by @var{stream} need not be opened +using @code{fopen64} or @code{freopen64} since its mode is not important +for this function. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a 32 +bits machine this function is available under the name @code{freopen} +and so transparently replaces the old interface. +@end deftypefun + +In some situations it is useful to know whether a given stream is +available for reading or writing. This information is normally not +available and would have to be remembered separately. Solaris +introduced a few functions to get this information from the stream +descriptor and these functions are also available in @theglibc{}. + +@comment stdio_ext.h +@comment GNU +@deftypefun int __freadable (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{__freadable} function determines whether the stream +@var{stream} was opened to allow reading. In this case the return value +is nonzero. For write-only streams the function returns zero. + +This function is declared in @file{stdio_ext.h}. +@end deftypefun + +@comment stdio_ext.h +@comment GNU +@deftypefun int __fwritable (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{__fwritable} function determines whether the stream +@var{stream} was opened to allow writing. In this case the return value +is nonzero. For read-only streams the function returns zero. + +This function is declared in @file{stdio_ext.h}. +@end deftypefun + +For slightly different kinds of problems there are two more functions. +They provide even finer-grained information. + +@comment stdio_ext.h +@comment GNU +@deftypefun int __freading (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{__freading} function determines whether the stream +@var{stream} was last read from or whether it is opened read-only. In +this case the return value is nonzero, otherwise it is zero. +Determining whether a stream opened for reading and writing was last +used for writing allows to draw conclusions about the content about the +buffer, among other things. + +This function is declared in @file{stdio_ext.h}. +@end deftypefun + +@comment stdio_ext.h +@comment GNU +@deftypefun int __fwriting (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{__fwriting} function determines whether the stream +@var{stream} was last written to or whether it is opened write-only. In +this case the return value is nonzero, otherwise it is zero. + +This function is declared in @file{stdio_ext.h}. +@end deftypefun + + +@node Closing Streams +@section Closing Streams + +@cindex closing a stream +When a stream is closed with @code{fclose}, the connection between the +stream and the file is canceled. After you have closed a stream, you +cannot perform any additional operations on it. + +@comment stdio.h +@comment ISO +@deftypefun int fclose (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c After fclose, it is undefined behavior to use the stream it points +@c to. Therefore, one must only call fclose when the stream is +@c otherwise unused. Concurrent uses started before will complete +@c successfully because of the lock, which makes it MT-Safe. Calling it +@c from a signal handler is perfectly safe if the stream is known to be +@c no longer used, which is a precondition for fclose to be safe in the +@c first place; since this is no further requirement, fclose is safe for +@c use in async signals too. After calling fclose, you can no longer +@c use the stream, not even to fclose it again, so its memory and file +@c descriptor may leak if fclose is canceled before @c releasing them. +@c That the stream must be unused and it becomes unused after the call +@c is what would enable fclose to be AS- and AC-Safe while freopen +@c isn't. However, because of the possibility of leaving __gconv_lock +@c taken upon cancellation, AC-Safety is lost. +This function causes @var{stream} to be closed and the connection to +the corresponding file to be broken. Any buffered output is written +and any buffered input is discarded. The @code{fclose} function returns +a value of @code{0} if the file was closed successfully, and @code{EOF} +if an error was detected. + +It is important to check for errors when you call @code{fclose} to close +an output stream, because real, everyday errors can be detected at this +time. For example, when @code{fclose} writes the remaining buffered +output, it might get an error because the disk is full. Even if you +know the buffer is empty, errors can still occur when closing a file if +you are using NFS. + +The function @code{fclose} is declared in @file{stdio.h}. +@end deftypefun + +To close all streams currently available @theglibc{} provides +another function. + +@comment stdio.h +@comment GNU +@deftypefun int fcloseall (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:streams}}@asunsafe{}@acsafe{}} +@c Like fclose, using any previously-opened streams after fcloseall is +@c undefined. However, the implementation of fcloseall isn't equivalent +@c to calling fclose for all streams: it just flushes and unbuffers all +@c streams, without any locking. It's the flushing without locking that +@c makes it unsafe. +This function causes all open streams of the process to be closed and +the connections to corresponding files to be broken. All buffered data +is written and any buffered input is discarded. The @code{fcloseall} +function returns a value of @code{0} if all the files were closed +successfully, and @code{EOF} if an error was detected. + +This function should be used only in special situations, e.g., when an +error occurred and the program must be aborted. Normally each single +stream should be closed separately so that problems with individual +streams can be identified. It is also problematic since the standard +streams (@pxref{Standard Streams}) will also be closed. + +The function @code{fcloseall} is declared in @file{stdio.h}. +@end deftypefun + +If the @code{main} function to your program returns, or if you call the +@code{exit} function (@pxref{Normal Termination}), all open streams are +automatically closed properly. If your program terminates in any other +manner, such as by calling the @code{abort} function (@pxref{Aborting a +Program}) or from a fatal signal (@pxref{Signal Handling}), open streams +might not be closed properly. Buffered output might not be flushed and +files may be incomplete. For more information on buffering of streams, +see @ref{Stream Buffering}. + +@node Streams and Threads +@section Streams and Threads + +@cindex threads +@cindex multi-threaded application +Streams can be used in multi-threaded applications in the same way they +are used in single-threaded applications. But the programmer must be +aware of the possible complications. It is important to know about +these also if the program one writes never use threads since the design +and implementation of many stream functions are heavily influenced by the +requirements added by multi-threaded programming. + +The POSIX standard requires that by default the stream operations are +atomic. I.e., issuing two stream operations for the same stream in two +threads at the same time will cause the operations to be executed as if +they were issued sequentially. The buffer operations performed while +reading or writing are protected from other uses of the same stream. To +do this each stream has an internal lock object which has to be +(implicitly) acquired before any work can be done. + +But there are situations where this is not enough and there are also +situations where this is not wanted. The implicit locking is not enough +if the program requires more than one stream function call to happen +atomically. One example would be if an output line a program wants to +generate is created by several function calls. The functions by +themselves would ensure only atomicity of their own operation, but not +atomicity over all the function calls. For this it is necessary to +perform the stream locking in the application code. + +@comment stdio.h +@comment POSIX +@deftypefun void flockfile (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{}}} +@c There's no way to tell whether the lock was acquired before or after +@c cancellation so as to unlock only when appropriate. +The @code{flockfile} function acquires the internal locking object +associated with the stream @var{stream}. This ensures that no other +thread can explicitly through @code{flockfile}/@code{ftrylockfile} or +implicitly through the call of a stream function lock the stream. The +thread will block until the lock is acquired. An explicit call to +@code{funlockfile} has to be used to release the lock. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun int ftrylockfile (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{}}} +The @code{ftrylockfile} function tries to acquire the internal locking +object associated with the stream @var{stream} just like +@code{flockfile}. But unlike @code{flockfile} this function does not +block if the lock is not available. @code{ftrylockfile} returns zero if +the lock was successfully acquired. Otherwise the stream is locked by +another thread. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun void funlockfile (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{}}} +The @code{funlockfile} function releases the internal locking object of +the stream @var{stream}. The stream must have been locked before by a +call to @code{flockfile} or a successful call of @code{ftrylockfile}. +The implicit locking performed by the stream operations do not count. +The @code{funlockfile} function does not return an error status and the +behavior of a call for a stream which is not locked by the current +thread is undefined. +@end deftypefun + +The following example shows how the functions above can be used to +generate an output line atomically even in multi-threaded applications +(yes, the same job could be done with one @code{fprintf} call but it is +sometimes not possible): + +@smallexample +FILE *fp; +@{ + @dots{} + flockfile (fp); + fputs ("This is test number ", fp); + fprintf (fp, "%d\n", test); + funlockfile (fp) +@} +@end smallexample + +Without the explicit locking it would be possible for another thread to +use the stream @var{fp} after the @code{fputs} call returns and before +@code{fprintf} was called with the result that the number does not +follow the word @samp{number}. + +From this description it might already be clear that the locking objects +in streams are no simple mutexes. Since locking the same stream twice +in the same thread is allowed the locking objects must be equivalent to +recursive mutexes. These mutexes keep track of the owner and the number +of times the lock is acquired. The same number of @code{funlockfile} +calls by the same threads is necessary to unlock the stream completely. +For instance: + +@smallexample +void +foo (FILE *fp) +@{ + ftrylockfile (fp); + fputs ("in foo\n", fp); + /* @r{This is very wrong!!!} */ + funlockfile (fp); +@} +@end smallexample + +It is important here that the @code{funlockfile} function is only called +if the @code{ftrylockfile} function succeeded in locking the stream. It +is therefore always wrong to ignore the result of @code{ftrylockfile}. +And it makes no sense since otherwise one would use @code{flockfile}. +The result of code like that above is that either @code{funlockfile} +tries to free a stream that hasn't been locked by the current thread or it +frees the stream prematurely. The code should look like this: + +@smallexample +void +foo (FILE *fp) +@{ + if (ftrylockfile (fp) == 0) + @{ + fputs ("in foo\n", fp); + funlockfile (fp); + @} +@} +@end smallexample + +Now that we covered why it is necessary to have locking it is +necessary to talk about situations when locking is unwanted and what can +be done. The locking operations (explicit or implicit) don't come for +free. Even if a lock is not taken the cost is not zero. The operations +which have to be performed require memory operations that are safe in +multi-processor environments. With the many local caches involved in +such systems this is quite costly. So it is best to avoid the locking +completely if it is not needed -- because the code in question is never +used in a context where two or more threads may use a stream at a time. +This can be determined most of the time for application code; for +library code which can be used in many contexts one should default to be +conservative and use locking. + +There are two basic mechanisms to avoid locking. The first is to use +the @code{_unlocked} variants of the stream operations. The POSIX +standard defines quite a few of those and @theglibc{} adds a few +more. These variants of the functions behave just like the functions +with the name without the suffix except that they do not lock the +stream. Using these functions is very desirable since they are +potentially much faster. This is not only because the locking +operation itself is avoided. More importantly, functions like +@code{putc} and @code{getc} are very simple and traditionally (before the +introduction of threads) were implemented as macros which are very fast +if the buffer is not empty. With the addition of locking requirements +these functions are no longer implemented as macros since they would +expand to too much code. +But these macros are still available with the same functionality under the new +names @code{putc_unlocked} and @code{getc_unlocked}. This possibly huge +difference of speed also suggests the use of the @code{_unlocked} +functions even if locking is required. The difference is that the +locking then has to be performed in the program: + +@smallexample +void +foo (FILE *fp, char *buf) +@{ + flockfile (fp); + while (*buf != '/') + putc_unlocked (*buf++, fp); + funlockfile (fp); +@} +@end smallexample + +If in this example the @code{putc} function would be used and the +explicit locking would be missing the @code{putc} function would have to +acquire the lock in every call, potentially many times depending on when +the loop terminates. Writing it the way illustrated above allows the +@code{putc_unlocked} macro to be used which means no locking and direct +manipulation of the buffer of the stream. + +A second way to avoid locking is by using a non-standard function which +was introduced in Solaris and is available in @theglibc{} as well. + +@comment stdio_ext.h +@comment GNU +@deftypefun int __fsetlocking (FILE *@var{stream}, int @var{type}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asulock{}}@acsafe{}} +@c Changing the implicit-locking status of a stream while it's in use by +@c another thread may cause a lock to be implicitly acquired and not +@c released, or vice-versa. This function should probably hold the lock +@c while changing this setting, to make sure we don't change it while +@c there are any concurrent uses. Meanwhile, callers should acquire the +@c lock themselves to be safe, and even concurrent uses with external +@c locking will be fine, as long as functions that require external +@c locking are not called without holding locks. + +The @code{__fsetlocking} function can be used to select whether the +stream operations will implicitly acquire the locking object of the +stream @var{stream}. By default this is done but it can be disabled and +reinstated using this function. There are three values defined for the +@var{type} parameter. + +@vtable @code +@item FSETLOCKING_INTERNAL +The stream @code{stream} will from now on use the default internal +locking. Every stream operation with exception of the @code{_unlocked} +variants will implicitly lock the stream. + +@item FSETLOCKING_BYCALLER +After the @code{__fsetlocking} function returns, the user is responsible +for locking the stream. None of the stream operations will implicitly +do this anymore until the state is set back to +@code{FSETLOCKING_INTERNAL}. + +@item FSETLOCKING_QUERY +@code{__fsetlocking} only queries the current locking state of the +stream. The return value will be @code{FSETLOCKING_INTERNAL} or +@code{FSETLOCKING_BYCALLER} depending on the state. +@end vtable + +The return value of @code{__fsetlocking} is either +@code{FSETLOCKING_INTERNAL} or @code{FSETLOCKING_BYCALLER} depending on +the state of the stream before the call. + +This function and the values for the @var{type} parameter are declared +in @file{stdio_ext.h}. +@end deftypefun + +This function is especially useful when program code has to be used +which is written without knowledge about the @code{_unlocked} functions +(or if the programmer was too lazy to use them). + +@node Streams and I18N +@section Streams in Internationalized Applications + +@w{ISO C90} introduced the new type @code{wchar_t} to allow handling +larger character sets. What was missing was a possibility to output +strings of @code{wchar_t} directly. One had to convert them into +multibyte strings using @code{mbstowcs} (there was no @code{mbsrtowcs} +yet) and then use the normal stream functions. While this is doable it +is very cumbersome since performing the conversions is not trivial and +greatly increases program complexity and size. + +The Unix standard early on (I think in XPG4.2) introduced two additional +format specifiers for the @code{printf} and @code{scanf} families of +functions. Printing and reading of single wide characters was made +possible using the @code{%C} specifier and wide character strings can be +handled with @code{%S}. These modifiers behave just like @code{%c} and +@code{%s} only that they expect the corresponding argument to have the +wide character type and that the wide character and string are +transformed into/from multibyte strings before being used. + +This was a beginning but it is still not good enough. Not always is it +desirable to use @code{printf} and @code{scanf}. The other, smaller and +faster functions cannot handle wide characters. Second, it is not +possible to have a format string for @code{printf} and @code{scanf} +consisting of wide characters. The result is that format strings would +have to be generated if they have to contain non-basic characters. + +@cindex C++ streams +@cindex streams, C++ +In the @w{Amendment 1} to @w{ISO C90} a whole new set of functions was +added to solve the problem. Most of the stream functions got a +counterpart which take a wide character or wide character string instead +of a character or string respectively. The new functions operate on the +same streams (like @code{stdout}). This is different from the model of +the C++ runtime library where separate streams for wide and normal I/O +are used. + +@cindex orientation, stream +@cindex stream orientation +Being able to use the same stream for wide and normal operations comes +with a restriction: a stream can be used either for wide operations or +for normal operations. Once it is decided there is no way back. Only a +call to @code{freopen} or @code{freopen64} can reset the +@dfn{orientation}. The orientation can be decided in three ways: + +@itemize @bullet +@item +If any of the normal character functions are used (this includes the +@code{fread} and @code{fwrite} functions) the stream is marked as not +wide oriented. + +@item +If any of the wide character functions are used the stream is marked as +wide oriented. + +@item +The @code{fwide} function can be used to set the orientation either way. +@end itemize + +It is important to never mix the use of wide and not wide operations on +a stream. There are no diagnostics issued. The application behavior +will simply be strange or the application will simply crash. The +@code{fwide} function can help avoid this. + +@comment wchar.h +@comment ISO +@deftypefun int fwide (FILE *@var{stream}, int @var{mode}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{}}} +@c Querying is always safe, but changing the stream when it's in use +@c upthread may be problematic. Like most lock-acquiring functions, +@c this one may leak the lock if canceled. + +The @code{fwide} function can be used to set and query the state of the +orientation of the stream @var{stream}. If the @var{mode} parameter has +a positive value the streams get wide oriented, for negative values +narrow oriented. It is not possible to overwrite previous orientations +with @code{fwide}. I.e., if the stream @var{stream} was already +oriented before the call nothing is done. + +If @var{mode} is zero the current orientation state is queried and +nothing is changed. + +The @code{fwide} function returns a negative value, zero, or a positive +value if the stream is narrow, not at all, or wide oriented +respectively. + +This function was introduced in @w{Amendment 1} to @w{ISO C90} and is +declared in @file{wchar.h}. +@end deftypefun + +It is generally a good idea to orient a stream as early as possible. +This can prevent surprise especially for the standard streams +@code{stdin}, @code{stdout}, and @code{stderr}. If some library +function in some situations uses one of these streams and this use +orients the stream in a different way the rest of the application +expects it one might end up with hard to reproduce errors. Remember +that no errors are signal if the streams are used incorrectly. Leaving +a stream unoriented after creation is normally only necessary for +library functions which create streams which can be used in different +contexts. + +When writing code which uses streams and which can be used in different +contexts it is important to query the orientation of the stream before +using it (unless the rules of the library interface demand a specific +orientation). The following little, silly function illustrates this. + +@smallexample +void +print_f (FILE *fp) +@{ + if (fwide (fp, 0) > 0) + /* @r{Positive return value means wide orientation.} */ + fputwc (L'f', fp); + else + fputc ('f', fp); +@} +@end smallexample + +Note that in this case the function @code{print_f} decides about the +orientation of the stream if it was unoriented before (will not happen +if the advice above is followed). + +The encoding used for the @code{wchar_t} values is unspecified and the +user must not make any assumptions about it. For I/O of @code{wchar_t} +values this means that it is impossible to write these values directly +to the stream. This is not what follows from the @w{ISO C} locale model +either. What happens instead is that the bytes read from or written to +the underlying media are first converted into the internal encoding +chosen by the implementation for @code{wchar_t}. The external encoding +is determined by the @code{LC_CTYPE} category of the current locale or +by the @samp{ccs} part of the mode specification given to @code{fopen}, +@code{fopen64}, @code{freopen}, or @code{freopen64}. How and when the +conversion happens is unspecified and it happens invisibly to the user. + +Since a stream is created in the unoriented state it has at that point +no conversion associated with it. The conversion which will be used is +determined by the @code{LC_CTYPE} category selected at the time the +stream is oriented. If the locales are changed at the runtime this +might produce surprising results unless one pays attention. This is +just another good reason to orient the stream explicitly as soon as +possible, perhaps with a call to @code{fwide}. + +@node Simple Output +@section Simple Output by Characters or Lines + +@cindex writing to a stream, by characters +This section describes functions for performing character- and +line-oriented output. + +These narrow stream functions are declared in the header file +@file{stdio.h} and the wide stream functions in @file{wchar.h}. +@pindex stdio.h +@pindex wchar.h + +@comment stdio.h +@comment ISO +@deftypefun int fputc (int @var{c}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +@c If the stream is in use when interrupted by a signal, the recursive +@c lock won't help ensure the stream is consistent; indeed, if fputc +@c gets a signal precisely before the post-incremented _IO_write_ptr +@c value is stored, we may overwrite the interrupted write. Conversely, +@c depending on compiler optimizations, the incremented _IO_write_ptr +@c may be stored before the character is stored in the buffer, +@c corrupting the stream if async cancel hits between the two stores. +@c There may be other reasons for AS- and AC-unsafety in the overflow +@c cases. +The @code{fputc} function converts the character @var{c} to type +@code{unsigned char}, and writes it to the stream @var{stream}. +@code{EOF} is returned if a write error occurs; otherwise the +character @var{c} is returned. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun wint_t fputwc (wchar_t @var{wc}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +The @code{fputwc} function writes the wide character @var{wc} to the +stream @var{stream}. @code{WEOF} is returned if a write error occurs; +otherwise the character @var{wc} is returned. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun int fputc_unlocked (int @var{c}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +@c The unlocked functions can't possibly satisfy the MT-Safety +@c requirements on their own, because they require external locking for +@c safety. +The @code{fputc_unlocked} function is equivalent to the @code{fputc} +function except that it does not implicitly lock the stream. +@end deftypefun + +@comment wchar.h +@comment POSIX +@deftypefun wint_t fputwc_unlocked (wchar_t @var{wc}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fputwc_unlocked} function is equivalent to the @code{fputwc} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int putc (int @var{c}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +This is just like @code{fputc}, except that most systems implement it as +a macro, making it faster. One consequence is that it may evaluate the +@var{stream} argument more than once, which is an exception to the +general rule for macros. @code{putc} is usually the best function to +use for writing a single character. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun wint_t putwc (wchar_t @var{wc}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +This is just like @code{fputwc}, except that it can be implement as +a macro, making it faster. One consequence is that it may evaluate the +@var{stream} argument more than once, which is an exception to the +general rule for macros. @code{putwc} is usually the best function to +use for writing a single wide character. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun int putc_unlocked (int @var{c}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{putc_unlocked} function is equivalent to the @code{putc} +function except that it does not implicitly lock the stream. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun wint_t putwc_unlocked (wchar_t @var{wc}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{putwc_unlocked} function is equivalent to the @code{putwc} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int putchar (int @var{c}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +The @code{putchar} function is equivalent to @code{putc} with +@code{stdout} as the value of the @var{stream} argument. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun wint_t putwchar (wchar_t @var{wc}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +The @code{putwchar} function is equivalent to @code{putwc} with +@code{stdout} as the value of the @var{stream} argument. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun int putchar_unlocked (int @var{c}) +@safety{@prelim{}@mtunsafe{@mtasurace{:stdout}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{putchar_unlocked} function is equivalent to the @code{putchar} +function except that it does not implicitly lock the stream. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun wint_t putwchar_unlocked (wchar_t @var{wc}) +@safety{@prelim{}@mtunsafe{@mtasurace{:stdout}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{putwchar_unlocked} function is equivalent to the @code{putwchar} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int fputs (const char *@var{s}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +The function @code{fputs} writes the string @var{s} to the stream +@var{stream}. The terminating null character is not written. +This function does @emph{not} add a newline character, either. +It outputs only the characters in the string. + +This function returns @code{EOF} if a write error occurs, and otherwise +a non-negative value. + +For example: + +@smallexample +fputs ("Are ", stdout); +fputs ("you ", stdout); +fputs ("hungry?\n", stdout); +@end smallexample + +@noindent +outputs the text @samp{Are you hungry?} followed by a newline. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int fputws (const wchar_t *@var{ws}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +The function @code{fputws} writes the wide character string @var{ws} to +the stream @var{stream}. The terminating null character is not written. +This function does @emph{not} add a newline character, either. It +outputs only the characters in the string. + +This function returns @code{WEOF} if a write error occurs, and otherwise +a non-negative value. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int fputs_unlocked (const char *@var{s}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fputs_unlocked} function is equivalent to the @code{fputs} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun int fputws_unlocked (const wchar_t *@var{ws}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fputws_unlocked} function is equivalent to the @code{fputws} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int puts (const char *@var{s}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{puts} function writes the string @var{s} to the stream +@code{stdout} followed by a newline. The terminating null character of +the string is not written. (Note that @code{fputs} does @emph{not} +write a newline as this function does.) + +@code{puts} is the most convenient function for printing simple +messages. For example: + +@smallexample +puts ("This is a message."); +@end smallexample + +@noindent +outputs the text @samp{This is a message.} followed by a newline. +@end deftypefun + +@comment stdio.h +@comment SVID +@deftypefun int putw (int @var{w}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function writes the word @var{w} (that is, an @code{int}) to +@var{stream}. It is provided for compatibility with SVID, but we +recommend you use @code{fwrite} instead (@pxref{Block Input/Output}). +@end deftypefun + +@node Character Input +@section Character Input + +@cindex reading from a stream, by characters +This section describes functions for performing character-oriented +input. These narrow stream functions are declared in the header file +@file{stdio.h} and the wide character functions are declared in +@file{wchar.h}. +@pindex stdio.h +@pindex wchar.h + +These functions return an @code{int} or @code{wint_t} value (for narrow +and wide stream functions respectively) that is either a character of +input, or the special value @code{EOF}/@code{WEOF} (usually -1). For +the narrow stream functions it is important to store the result of these +functions in a variable of type @code{int} instead of @code{char}, even +when you plan to use it only as a character. Storing @code{EOF} in a +@code{char} variable truncates its value to the size of a character, so +that it is no longer distinguishable from the valid character +@samp{(char) -1}. So always use an @code{int} for the result of +@code{getc} and friends, and check for @code{EOF} after the call; once +you've verified that the result is not @code{EOF}, you can be sure that +it will fit in a @samp{char} variable without loss of information. + +@comment stdio.h +@comment ISO +@deftypefun int fgetc (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +@c Same caveats as fputc, but instead of losing a write in case of async +@c signals, we may read the same character more than once, and the +@c stream may be left in odd states due to cancellation in the underflow +@c cases. +This function reads the next character as an @code{unsigned char} from +the stream @var{stream} and returns its value, converted to an +@code{int}. If an end-of-file condition or read error occurs, +@code{EOF} is returned instead. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun wint_t fgetwc (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function reads the next wide character from the stream @var{stream} +and returns its value. If an end-of-file condition or read error +occurs, @code{WEOF} is returned instead. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun int fgetc_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fgetc_unlocked} function is equivalent to the @code{fgetc} +function except that it does not implicitly lock the stream. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun wint_t fgetwc_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fgetwc_unlocked} function is equivalent to the @code{fgetwc} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int getc (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This is just like @code{fgetc}, except that it is permissible (and +typical) for it to be implemented as a macro that evaluates the +@var{stream} argument more than once. @code{getc} is often highly +optimized, so it is usually the best function to use to read a single +character. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun wint_t getwc (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This is just like @code{fgetwc}, except that it is permissible for it to +be implemented as a macro that evaluates the @var{stream} argument more +than once. @code{getwc} can be highly optimized, so it is usually the +best function to use to read a single wide character. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun int getc_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{getc_unlocked} function is equivalent to the @code{getc} +function except that it does not implicitly lock the stream. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun wint_t getwc_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{getwc_unlocked} function is equivalent to the @code{getwc} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int getchar (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{getchar} function is equivalent to @code{getc} with @code{stdin} +as the value of the @var{stream} argument. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun wint_t getwchar (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{getwchar} function is equivalent to @code{getwc} with @code{stdin} +as the value of the @var{stream} argument. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun int getchar_unlocked (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:stdin}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{getchar_unlocked} function is equivalent to the @code{getchar} +function except that it does not implicitly lock the stream. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun wint_t getwchar_unlocked (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:stdin}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{getwchar_unlocked} function is equivalent to the @code{getwchar} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +Here is an example of a function that does input using @code{fgetc}. It +would work just as well using @code{getc} instead, or using +@code{getchar ()} instead of @w{@code{fgetc (stdin)}}. The code would +also work the same for the wide character stream functions. + +@smallexample +int +y_or_n_p (const char *question) +@{ + fputs (question, stdout); + while (1) + @{ + int c, answer; + /* @r{Write a space to separate answer from question.} */ + fputc (' ', stdout); + /* @r{Read the first character of the line.} + @r{This should be the answer character, but might not be.} */ + c = tolower (fgetc (stdin)); + answer = c; + /* @r{Discard rest of input line.} */ + while (c != '\n' && c != EOF) + c = fgetc (stdin); + /* @r{Obey the answer if it was valid.} */ + if (answer == 'y') + return 1; + if (answer == 'n') + return 0; + /* @r{Answer was invalid: ask for valid answer.} */ + fputs ("Please answer y or n:", stdout); + @} +@} +@end smallexample + +@comment stdio.h +@comment SVID +@deftypefun int getw (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function reads a word (that is, an @code{int}) from @var{stream}. +It's provided for compatibility with SVID. We recommend you use +@code{fread} instead (@pxref{Block Input/Output}). Unlike @code{getc}, +any @code{int} value could be a valid result. @code{getw} returns +@code{EOF} when it encounters end-of-file or an error, but there is no +way to distinguish this from an input word with value -1. +@end deftypefun + +@node Line Input +@section Line-Oriented Input + +Since many programs interpret input on the basis of lines, it is +convenient to have functions to read a line of text from a stream. + +Standard C has functions to do this, but they aren't very safe: null +characters and even (for @code{gets}) long lines can confuse them. So +@theglibc{} provides the nonstandard @code{getline} function that +makes it easy to read lines reliably. + +Another GNU extension, @code{getdelim}, generalizes @code{getline}. It +reads a delimited record, defined as everything through the next +occurrence of a specified delimiter character. + +All these functions are declared in @file{stdio.h}. + +@comment stdio.h +@comment GNU +@deftypefun ssize_t getline (char **@var{lineptr}, size_t *@var{n}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{}}} +@c Besides the usual possibility of getting an inconsistent stream in a +@c signal handler or leaving it inconsistent in case of cancellation, +@c the possibility of leaving a dangling pointer upon cancellation +@c between reallocing the buffer at *lineptr and updating the pointer +@c brings about another case of @acucorrupt. +This function reads an entire line from @var{stream}, storing the text +(including the newline and a terminating null character) in a buffer +and storing the buffer address in @code{*@var{lineptr}}. + +Before calling @code{getline}, you should place in @code{*@var{lineptr}} +the address of a buffer @code{*@var{n}} bytes long, allocated with +@code{malloc}. If this buffer is long enough to hold the line, +@code{getline} stores the line in this buffer. Otherwise, +@code{getline} makes the buffer bigger using @code{realloc}, storing the +new buffer address back in @code{*@var{lineptr}} and the increased size +back in @code{*@var{n}}. +@xref{Unconstrained Allocation}. + +If you set @code{*@var{lineptr}} to a null pointer, and @code{*@var{n}} +to zero, before the call, then @code{getline} allocates the initial +buffer for you by calling @code{malloc}. This buffer remains allocated +even if @code{getline} encounters errors and is unable to read any bytes. + +In either case, when @code{getline} returns, @code{*@var{lineptr}} is +a @code{char *} which points to the text of the line. + +When @code{getline} is successful, it returns the number of characters +read (including the newline, but not including the terminating null). +This value enables you to distinguish null characters that are part of +the line from the null character inserted as a terminator. + +This function is a GNU extension, but it is the recommended way to read +lines from a stream. The alternative standard functions are unreliable. + +If an error occurs or end of file is reached without any bytes read, +@code{getline} returns @code{-1}. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun ssize_t getdelim (char **@var{lineptr}, size_t *@var{n}, int @var{delimiter}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{}}} +@c See the getline @acucorrupt note. +This function is like @code{getline} except that the character which +tells it to stop reading is not necessarily newline. The argument +@var{delimiter} specifies the delimiter character; @code{getdelim} keeps +reading until it sees that character (or end of file). + +The text is stored in @var{lineptr}, including the delimiter character +and a terminating null. Like @code{getline}, @code{getdelim} makes +@var{lineptr} bigger if it isn't big enough. + +@code{getline} is in fact implemented in terms of @code{getdelim}, just +like this: + +@smallexample +ssize_t +getline (char **lineptr, size_t *n, FILE *stream) +@{ + return getdelim (lineptr, n, '\n', stream); +@} +@end smallexample +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun {char *} fgets (char *@var{s}, int @var{count}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{fgets} function reads characters from the stream @var{stream} +up to and including a newline character and stores them in the string +@var{s}, adding a null character to mark the end of the string. You +must supply @var{count} characters worth of space in @var{s}, but the +number of characters read is at most @var{count} @minus{} 1. The extra +character space is used to hold the null character at the end of the +string. + +If the system is already at end of file when you call @code{fgets}, then +the contents of the array @var{s} are unchanged and a null pointer is +returned. A null pointer is also returned if a read error occurs. +Otherwise, the return value is the pointer @var{s}. + +@strong{Warning:} If the input data has a null character, you can't tell. +So don't use @code{fgets} unless you know the data cannot contain a null. +Don't use it to read files edited by the user because, if the user inserts +a null character, you should either handle it properly or print a clear +error message. We recommend using @code{getline} instead of @code{fgets}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} fgetws (wchar_t *@var{ws}, int @var{count}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{fgetws} function reads wide characters from the stream +@var{stream} up to and including a newline character and stores them in +the string @var{ws}, adding a null wide character to mark the end of the +string. You must supply @var{count} wide characters worth of space in +@var{ws}, but the number of characters read is at most @var{count} +@minus{} 1. The extra character space is used to hold the null wide +character at the end of the string. + +If the system is already at end of file when you call @code{fgetws}, then +the contents of the array @var{ws} are unchanged and a null pointer is +returned. A null pointer is also returned if a read error occurs. +Otherwise, the return value is the pointer @var{ws}. + +@strong{Warning:} If the input data has a null wide character (which are +null bytes in the input stream), you can't tell. So don't use +@code{fgetws} unless you know the data cannot contain a null. Don't use +it to read files edited by the user because, if the user inserts a null +character, you should either handle it properly or print a clear error +message. +@comment XXX We need getwline!!! +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun {char *} fgets_unlocked (char *@var{s}, int @var{count}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fgets_unlocked} function is equivalent to the @code{fgets} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun {wchar_t *} fgetws_unlocked (wchar_t *@var{ws}, int @var{count}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fgetws_unlocked} function is equivalent to the @code{fgetws} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefn {Deprecated function} {char *} gets (char *@var{s}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The function @code{gets} reads characters from the stream @code{stdin} +up to the next newline character, and stores them in the string @var{s}. +The newline character is discarded (note that this differs from the +behavior of @code{fgets}, which copies the newline character into the +string). If @code{gets} encounters a read error or end-of-file, it +returns a null pointer; otherwise it returns @var{s}. + +@strong{Warning:} The @code{gets} function is @strong{very dangerous} +because it provides no protection against overflowing the string +@var{s}. @Theglibc{} includes it for compatibility only. You +should @strong{always} use @code{fgets} or @code{getline} instead. To +remind you of this, the linker (if using GNU @code{ld}) will issue a +warning whenever you use @code{gets}. +@end deftypefn + +@node Unreading +@section Unreading +@cindex peeking at input +@cindex unreading characters +@cindex pushing input back + +In parser programs it is often useful to examine the next character in +the input stream without removing it from the stream. This is called +``peeking ahead'' at the input because your program gets a glimpse of +the input it will read next. + +Using stream I/O, you can peek ahead at input by first reading it and +then @dfn{unreading} it (also called @dfn{pushing it back} on the stream). +Unreading a character makes it available to be input again from the stream, +by the next call to @code{fgetc} or other input function on that stream. + +@menu +* Unreading Idea:: An explanation of unreading with pictures. +* How Unread:: How to call @code{ungetc} to do unreading. +@end menu + +@node Unreading Idea +@subsection What Unreading Means + +Here is a pictorial explanation of unreading. Suppose you have a +stream reading a file that contains just six characters, the letters +@samp{foobar}. Suppose you have read three characters so far. The +situation looks like this: + +@smallexample +f o o b a r + ^ +@end smallexample + +@noindent +so the next input character will be @samp{b}. + +@c @group Invalid outside @example +If instead of reading @samp{b} you unread the letter @samp{o}, you get a +situation like this: + +@smallexample +f o o b a r + | + o-- + ^ +@end smallexample + +@noindent +so that the next input characters will be @samp{o} and @samp{b}. +@c @end group + +@c @group +If you unread @samp{9} instead of @samp{o}, you get this situation: + +@smallexample +f o o b a r + | + 9-- + ^ +@end smallexample + +@noindent +so that the next input characters will be @samp{9} and @samp{b}. +@c @end group + +@node How Unread +@subsection Using @code{ungetc} To Do Unreading + +The function to unread a character is called @code{ungetc}, because it +reverses the action of @code{getc}. + +@comment stdio.h +@comment ISO +@deftypefun int ungetc (int @var{c}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{ungetc} function pushes back the character @var{c} onto the +input stream @var{stream}. So the next input from @var{stream} will +read @var{c} before anything else. + +If @var{c} is @code{EOF}, @code{ungetc} does nothing and just returns +@code{EOF}. This lets you call @code{ungetc} with the return value of +@code{getc} without needing to check for an error from @code{getc}. + +The character that you push back doesn't have to be the same as the last +character that was actually read from the stream. In fact, it isn't +necessary to actually read any characters from the stream before +unreading them with @code{ungetc}! But that is a strange way to write a +program; usually @code{ungetc} is used only to unread a character that +was just read from the same stream. @Theglibc{} supports this +even on files opened in binary mode, but other systems might not. + +@Theglibc{} only supports one character of pushback---in other +words, it does not work to call @code{ungetc} twice without doing input +in between. Other systems might let you push back multiple characters; +then reading from the stream retrieves the characters in the reverse +order that they were pushed. + +Pushing back characters doesn't alter the file; only the internal +buffering for the stream is affected. If a file positioning function +(such as @code{fseek}, @code{fseeko} or @code{rewind}; @pxref{File +Positioning}) is called, any pending pushed-back characters are +discarded. + +Unreading a character on a stream that is at end of file clears the +end-of-file indicator for the stream, because it makes the character of +input available. After you read that character, trying to read again +will encounter end of file. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun wint_t ungetwc (wint_t @var{wc}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{ungetwc} function behaves just like @code{ungetc} just that it +pushes back a wide character. +@end deftypefun + +Here is an example showing the use of @code{getc} and @code{ungetc} to +skip over whitespace characters. When this function reaches a +non-whitespace character, it unreads that character to be seen again on +the next read operation on the stream. + +@smallexample +#include <stdio.h> +#include <ctype.h> + +void +skip_whitespace (FILE *stream) +@{ + int c; + do + /* @r{No need to check for @code{EOF} because it is not} + @r{@code{isspace}, and @code{ungetc} ignores @code{EOF}.} */ + c = getc (stream); + while (isspace (c)); + ungetc (c, stream); +@} +@end smallexample + +@node Block Input/Output +@section Block Input/Output + +This section describes how to do input and output operations on blocks +of data. You can use these functions to read and write binary data, as +well as to read and write text in fixed-size blocks instead of by +characters or lines. +@cindex binary I/O to a stream +@cindex block I/O to a stream +@cindex reading from a stream, by blocks +@cindex writing to a stream, by blocks + +Binary files are typically used to read and write blocks of data in the +same format as is used to represent the data in a running program. In +other words, arbitrary blocks of memory---not just character or string +objects---can be written to a binary file, and meaningfully read in +again by the same program. + +Storing data in binary form is often considerably more efficient than +using the formatted I/O functions. Also, for floating-point numbers, +the binary form avoids possible loss of precision in the conversion +process. On the other hand, binary files can't be examined or modified +easily using many standard file utilities (such as text editors), and +are not portable between different implementations of the language, or +different kinds of computers. + +These functions are declared in @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypefun size_t fread (void *@var{data}, size_t @var{size}, size_t @var{count}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function reads up to @var{count} objects of size @var{size} into +the array @var{data}, from the stream @var{stream}. It returns the +number of objects actually read, which might be less than @var{count} if +a read error occurs or the end of the file is reached. This function +returns a value of zero (and doesn't read anything) if either @var{size} +or @var{count} is zero. + +If @code{fread} encounters end of file in the middle of an object, it +returns the number of complete objects read, and discards the partial +object. Therefore, the stream remains at the actual end of the file. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun size_t fread_unlocked (void *@var{data}, size_t @var{size}, size_t @var{count}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fread_unlocked} function is equivalent to the @code{fread} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun size_t fwrite (const void *@var{data}, size_t @var{size}, size_t @var{count}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function writes up to @var{count} objects of size @var{size} from +the array @var{data}, to the stream @var{stream}. The return value is +normally @var{count}, if the call succeeds. Any other value indicates +some sort of error, such as running out of space. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun size_t fwrite_unlocked (const void *@var{data}, size_t @var{size}, size_t @var{count}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fwrite_unlocked} function is equivalent to the @code{fwrite} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +@node Formatted Output +@section Formatted Output + +@cindex format string, for @code{printf} +@cindex template, for @code{printf} +@cindex formatted output to a stream +@cindex writing to a stream, formatted +The functions described in this section (@code{printf} and related +functions) provide a convenient way to perform formatted output. You +call @code{printf} with a @dfn{format string} or @dfn{template string} +that specifies how to format the values of the remaining arguments. + +Unless your program is a filter that specifically performs line- or +character-oriented processing, using @code{printf} or one of the other +related functions described in this section is usually the easiest and +most concise way to perform output. These functions are especially +useful for printing error messages, tables of data, and the like. + +@menu +* Formatted Output Basics:: Some examples to get you started. +* Output Conversion Syntax:: General syntax of conversion + specifications. +* Table of Output Conversions:: Summary of output conversions and + what they do. +* Integer Conversions:: Details about formatting of integers. +* Floating-Point Conversions:: Details about formatting of + floating-point numbers. +* Other Output Conversions:: Details about formatting of strings, + characters, pointers, and the like. +* Formatted Output Functions:: Descriptions of the actual functions. +* Dynamic Output:: Functions that allocate memory for the output. +* Variable Arguments Output:: @code{vprintf} and friends. +* Parsing a Template String:: What kinds of args does a given template + call for? +* Example of Parsing:: Sample program using @code{parse_printf_format}. +@end menu + +@node Formatted Output Basics +@subsection Formatted Output Basics + +The @code{printf} function can be used to print any number of arguments. +The template string argument you supply in a call provides +information not only about the number of additional arguments, but also +about their types and what style should be used for printing them. + +Ordinary characters in the template string are simply written to the +output stream as-is, while @dfn{conversion specifications} introduced by +a @samp{%} character in the template cause subsequent arguments to be +formatted and written to the output stream. For example, +@cindex conversion specifications (@code{printf}) + +@smallexample +int pct = 37; +char filename[] = "foo.txt"; +printf ("Processing of `%s' is %d%% finished.\nPlease be patient.\n", + filename, pct); +@end smallexample + +@noindent +produces output like + +@smallexample +Processing of `foo.txt' is 37% finished. +Please be patient. +@end smallexample + +This example shows the use of the @samp{%d} conversion to specify that +an @code{int} argument should be printed in decimal notation, the +@samp{%s} conversion to specify printing of a string argument, and +the @samp{%%} conversion to print a literal @samp{%} character. + +There are also conversions for printing an integer argument as an +unsigned value in octal, decimal, or hexadecimal radix (@samp{%o}, +@samp{%u}, or @samp{%x}, respectively); or as a character value +(@samp{%c}). + +Floating-point numbers can be printed in normal, fixed-point notation +using the @samp{%f} conversion or in exponential notation using the +@samp{%e} conversion. The @samp{%g} conversion uses either @samp{%e} +or @samp{%f} format, depending on what is more appropriate for the +magnitude of the particular number. + +You can control formatting more precisely by writing @dfn{modifiers} +between the @samp{%} and the character that indicates which conversion +to apply. These slightly alter the ordinary behavior of the conversion. +For example, most conversion specifications permit you to specify a +minimum field width and a flag indicating whether you want the result +left- or right-justified within the field. + +The specific flags and modifiers that are permitted and their +interpretation vary depending on the particular conversion. They're all +described in more detail in the following sections. Don't worry if this +all seems excessively complicated at first; you can almost always get +reasonable free-format output without using any of the modifiers at all. +The modifiers are mostly used to make the output look ``prettier'' in +tables. + +@node Output Conversion Syntax +@subsection Output Conversion Syntax + +This section provides details about the precise syntax of conversion +specifications that can appear in a @code{printf} template +string. + +Characters in the template string that are not part of a conversion +specification are printed as-is to the output stream. Multibyte +character sequences (@pxref{Character Set Handling}) are permitted in a +template string. + +The conversion specifications in a @code{printf} template string have +the general form: + +@smallexample +% @r{[} @var{param-no} @r{$]} @var{flags} @var{width} @r{[} . @var{precision} @r{]} @var{type} @var{conversion} +@end smallexample + +@noindent +or + +@smallexample +% @r{[} @var{param-no} @r{$]} @var{flags} @var{width} . @r{*} @r{[} @var{param-no} @r{$]} @var{type} @var{conversion} +@end smallexample + +For example, in the conversion specifier @samp{%-10.8ld}, the @samp{-} +is a flag, @samp{10} specifies the field width, the precision is +@samp{8}, the letter @samp{l} is a type modifier, and @samp{d} specifies +the conversion style. (This particular type specifier says to +print a @code{long int} argument in decimal notation, with a minimum of +8 digits left-justified in a field at least 10 characters wide.) + +In more detail, output conversion specifications consist of an +initial @samp{%} character followed in sequence by: + +@itemize @bullet +@item +An optional specification of the parameter used for this format. +Normally the parameters to the @code{printf} function are assigned to the +formats in the order of appearance in the format string. But in some +situations (such as message translation) this is not desirable and this +extension allows an explicit parameter to be specified. + +The @var{param-no} parts of the format must be integers in the range of +1 to the maximum number of arguments present to the function call. Some +implementations limit this number to a certain upper bound. The exact +limit can be retrieved by the following constant. + +@defvr Macro NL_ARGMAX +The value of @code{NL_ARGMAX} is the maximum value allowed for the +specification of a positional parameter in a @code{printf} call. The +actual value in effect at runtime can be retrieved by using +@code{sysconf} using the @code{_SC_NL_ARGMAX} parameter @pxref{Sysconf +Definition}. + +Some systems have a quite low limit such as @math{9} for @w{System V} +systems. @Theglibc{} has no real limit. +@end defvr + +If any of the formats has a specification for the parameter position all +of them in the format string shall have one. Otherwise the behavior is +undefined. + +@item +Zero or more @dfn{flag characters} that modify the normal behavior of +the conversion specification. +@cindex flag character (@code{printf}) + +@item +An optional decimal integer specifying the @dfn{minimum field width}. +If the normal conversion produces fewer characters than this, the field +is padded with spaces to the specified width. This is a @emph{minimum} +value; if the normal conversion produces more characters than this, the +field is @emph{not} truncated. Normally, the output is right-justified +within the field. +@cindex minimum field width (@code{printf}) + +You can also specify a field width of @samp{*}. This means that the +next argument in the argument list (before the actual value to be +printed) is used as the field width. The value must be an @code{int}. +If the value is negative, this means to set the @samp{-} flag (see +below) and to use the absolute value as the field width. + +@item +An optional @dfn{precision} to specify the number of digits to be +written for the numeric conversions. If the precision is specified, it +consists of a period (@samp{.}) followed optionally by a decimal integer +(which defaults to zero if omitted). +@cindex precision (@code{printf}) + +You can also specify a precision of @samp{*}. This means that the next +argument in the argument list (before the actual value to be printed) is +used as the precision. The value must be an @code{int}, and is ignored +if it is negative. If you specify @samp{*} for both the field width and +precision, the field width argument precedes the precision argument. +Other C library versions may not recognize this syntax. + +@item +An optional @dfn{type modifier character}, which is used to specify the +data type of the corresponding argument if it differs from the default +type. (For example, the integer conversions assume a type of @code{int}, +but you can specify @samp{h}, @samp{l}, or @samp{L} for other integer +types.) +@cindex type modifier character (@code{printf}) + +@item +A character that specifies the conversion to be applied. +@end itemize + +The exact options that are permitted and how they are interpreted vary +between the different conversion specifiers. See the descriptions of the +individual conversions for information about the particular options that +they use. + +With the @samp{-Wformat} option, the GNU C compiler checks calls to +@code{printf} and related functions. It examines the format string and +verifies that the correct number and types of arguments are supplied. +There is also a GNU C syntax to tell the compiler that a function you +write uses a @code{printf}-style format string. +@xref{Function Attributes, , Declaring Attributes of Functions, +gcc.info, Using GNU CC}, for more information. + +@node Table of Output Conversions +@subsection Table of Output Conversions +@cindex output conversions, for @code{printf} + +Here is a table summarizing what all the different conversions do: + +@table @asis +@item @samp{%d}, @samp{%i} +Print an integer as a signed decimal number. @xref{Integer +Conversions}, for details. @samp{%d} and @samp{%i} are synonymous for +output, but are different when used with @code{scanf} for input +(@pxref{Table of Input Conversions}). + +@item @samp{%o} +Print an integer as an unsigned octal number. @xref{Integer +Conversions}, for details. + +@item @samp{%u} +Print an integer as an unsigned decimal number. @xref{Integer +Conversions}, for details. + +@item @samp{%x}, @samp{%X} +Print an integer as an unsigned hexadecimal number. @samp{%x} uses +lower-case letters and @samp{%X} uses upper-case. @xref{Integer +Conversions}, for details. + +@item @samp{%f} +Print a floating-point number in normal (fixed-point) notation. +@xref{Floating-Point Conversions}, for details. + +@item @samp{%e}, @samp{%E} +Print a floating-point number in exponential notation. @samp{%e} uses +lower-case letters and @samp{%E} uses upper-case. @xref{Floating-Point +Conversions}, for details. + +@item @samp{%g}, @samp{%G} +Print a floating-point number in either normal or exponential notation, +whichever is more appropriate for its magnitude. @samp{%g} uses +lower-case letters and @samp{%G} uses upper-case. @xref{Floating-Point +Conversions}, for details. + +@item @samp{%a}, @samp{%A} +Print a floating-point number in a hexadecimal fractional notation with +the exponent to base 2 represented in decimal digits. @samp{%a} uses +lower-case letters and @samp{%A} uses upper-case. @xref{Floating-Point +Conversions}, for details. + +@item @samp{%c} +Print a single character. @xref{Other Output Conversions}. + +@item @samp{%C} +This is an alias for @samp{%lc} which is supported for compatibility +with the Unix standard. + +@item @samp{%s} +Print a string. @xref{Other Output Conversions}. + +@item @samp{%S} +This is an alias for @samp{%ls} which is supported for compatibility +with the Unix standard. + +@item @samp{%p} +Print the value of a pointer. @xref{Other Output Conversions}. + +@item @samp{%n} +Get the number of characters printed so far. @xref{Other Output Conversions}. +Note that this conversion specification never produces any output. + +@item @samp{%m} +Print the string corresponding to the value of @code{errno}. +(This is a GNU extension.) +@xref{Other Output Conversions}. + +@item @samp{%%} +Print a literal @samp{%} character. @xref{Other Output Conversions}. +@end table + +If the syntax of a conversion specification is invalid, unpredictable +things will happen, so don't do this. If there aren't enough function +arguments provided to supply values for all the conversion +specifications in the template string, or if the arguments are not of +the correct types, the results are unpredictable. If you supply more +arguments than conversion specifications, the extra argument values are +simply ignored; this is sometimes useful. + +@node Integer Conversions +@subsection Integer Conversions + +This section describes the options for the @samp{%d}, @samp{%i}, +@samp{%o}, @samp{%u}, @samp{%x}, and @samp{%X} conversion +specifications. These conversions print integers in various formats. + +The @samp{%d} and @samp{%i} conversion specifications both print an +@code{int} argument as a signed decimal number; while @samp{%o}, +@samp{%u}, and @samp{%x} print the argument as an unsigned octal, +decimal, or hexadecimal number (respectively). The @samp{%X} conversion +specification is just like @samp{%x} except that it uses the characters +@samp{ABCDEF} as digits instead of @samp{abcdef}. + +The following flags are meaningful: + +@table @asis +@item @samp{-} +Left-justify the result in the field (instead of the normal +right-justification). + +@item @samp{+} +For the signed @samp{%d} and @samp{%i} conversions, print a +plus sign if the value is positive. + +@item @samp{ } +For the signed @samp{%d} and @samp{%i} conversions, if the result +doesn't start with a plus or minus sign, prefix it with a space +character instead. Since the @samp{+} flag ensures that the result +includes a sign, this flag is ignored if you supply both of them. + +@item @samp{#} +For the @samp{%o} conversion, this forces the leading digit to be +@samp{0}, as if by increasing the precision. For @samp{%x} or +@samp{%X}, this prefixes a leading @samp{0x} or @samp{0X} (respectively) +to the result. This doesn't do anything useful for the @samp{%d}, +@samp{%i}, or @samp{%u} conversions. Using this flag produces output +which can be parsed by the @code{strtoul} function (@pxref{Parsing of +Integers}) and @code{scanf} with the @samp{%i} conversion +(@pxref{Numeric Input Conversions}). + +@item @samp{'} +Separate the digits into groups as specified by the locale specified for +the @code{LC_NUMERIC} category; @pxref{General Numeric}. This flag is a +GNU extension. + +@item @samp{0} +Pad the field with zeros instead of spaces. The zeros are placed after +any indication of sign or base. This flag is ignored if the @samp{-} +flag is also specified, or if a precision is specified. +@end table + +If a precision is supplied, it specifies the minimum number of digits to +appear; leading zeros are produced if necessary. If you don't specify a +precision, the number is printed with as many digits as it needs. If +you convert a value of zero with an explicit precision of zero, then no +characters at all are produced. + +Without a type modifier, the corresponding argument is treated as an +@code{int} (for the signed conversions @samp{%i} and @samp{%d}) or +@code{unsigned int} (for the unsigned conversions @samp{%o}, @samp{%u}, +@samp{%x}, and @samp{%X}). Recall that since @code{printf} and friends +are variadic, any @code{char} and @code{short} arguments are +automatically converted to @code{int} by the default argument +promotions. For arguments of other integer types, you can use these +modifiers: + +@table @samp +@item hh +Specifies that the argument is a @code{signed char} or @code{unsigned +char}, as appropriate. A @code{char} argument is converted to an +@code{int} or @code{unsigned int} by the default argument promotions +anyway, but the @samp{hh} modifier says to convert it back to a +@code{char} again. + +This modifier was introduced in @w{ISO C99}. + +@item h +Specifies that the argument is a @code{short int} or @code{unsigned +short int}, as appropriate. A @code{short} argument is converted to an +@code{int} or @code{unsigned int} by the default argument promotions +anyway, but the @samp{h} modifier says to convert it back to a +@code{short} again. + +@item j +Specifies that the argument is a @code{intmax_t} or @code{uintmax_t}, as +appropriate. + +This modifier was introduced in @w{ISO C99}. + +@item l +Specifies that the argument is a @code{long int} or @code{unsigned long +int}, as appropriate. Two @samp{l} characters are like the @samp{L} +modifier, below. + +If used with @samp{%c} or @samp{%s} the corresponding parameter is +considered as a wide character or wide character string respectively. +This use of @samp{l} was introduced in @w{Amendment 1} to @w{ISO C90}. + +@item L +@itemx ll +@itemx q +Specifies that the argument is a @code{long long int}. (This type is +an extension supported by the GNU C compiler. On systems that don't +support extra-long integers, this is the same as @code{long int}.) + +The @samp{q} modifier is another name for the same thing, which comes +from 4.4 BSD; a @w{@code{long long int}} is sometimes called a ``quad'' +@code{int}. + +@item t +Specifies that the argument is a @code{ptrdiff_t}. + +This modifier was introduced in @w{ISO C99}. + +@item z +@itemx Z +Specifies that the argument is a @code{size_t}. + +@samp{z} was introduced in @w{ISO C99}. @samp{Z} is a GNU extension +predating this addition and should not be used in new code. +@end table + +Here is an example. Using the template string: + +@smallexample +"|%5d|%-5d|%+5d|%+-5d|% 5d|%05d|%5.0d|%5.2d|%d|\n" +@end smallexample + +@noindent +to print numbers using the different options for the @samp{%d} +conversion gives results like: + +@smallexample +| 0|0 | +0|+0 | 0|00000| | 00|0| +| 1|1 | +1|+1 | 1|00001| 1| 01|1| +| -1|-1 | -1|-1 | -1|-0001| -1| -01|-1| +|100000|100000|+100000|+100000| 100000|100000|100000|100000|100000| +@end smallexample + +In particular, notice what happens in the last case where the number +is too large to fit in the minimum field width specified. + +Here are some more examples showing how unsigned integers print under +various format options, using the template string: + +@smallexample +"|%5u|%5o|%5x|%5X|%#5o|%#5x|%#5X|%#10.8x|\n" +@end smallexample + +@smallexample +| 0| 0| 0| 0| 0| 0| 0| 00000000| +| 1| 1| 1| 1| 01| 0x1| 0X1|0x00000001| +|100000|303240|186a0|186A0|0303240|0x186a0|0X186A0|0x000186a0| +@end smallexample + + +@node Floating-Point Conversions +@subsection Floating-Point Conversions + +This section discusses the conversion specifications for floating-point +numbers: the @samp{%f}, @samp{%e}, @samp{%E}, @samp{%g}, and @samp{%G} +conversions. + +The @samp{%f} conversion prints its argument in fixed-point notation, +producing output of the form +@w{[@code{-}]@var{ddd}@code{.}@var{ddd}}, +where the number of digits following the decimal point is controlled +by the precision you specify. + +The @samp{%e} conversion prints its argument in exponential notation, +producing output of the form +@w{[@code{-}]@var{d}@code{.}@var{ddd}@code{e}[@code{+}|@code{-}]@var{dd}}. +Again, the number of digits following the decimal point is controlled by +the precision. The exponent always contains at least two digits. The +@samp{%E} conversion is similar but the exponent is marked with the letter +@samp{E} instead of @samp{e}. + +The @samp{%g} and @samp{%G} conversions print the argument in the style +of @samp{%e} or @samp{%E} (respectively) if the exponent would be less +than -4 or greater than or equal to the precision; otherwise they use +the @samp{%f} style. A precision of @code{0}, is taken as 1. +Trailing zeros are removed from the fractional portion of the result and +a decimal-point character appears only if it is followed by a digit. + +The @samp{%a} and @samp{%A} conversions are meant for representing +floating-point numbers exactly in textual form so that they can be +exchanged as texts between different programs and/or machines. The +numbers are represented in the form +@w{[@code{-}]@code{0x}@var{h}@code{.}@var{hhh}@code{p}[@code{+}|@code{-}]@var{dd}}. +At the left of the decimal-point character exactly one digit is print. +This character is only @code{0} if the number is denormalized. +Otherwise the value is unspecified; it is implementation dependent how many +bits are used. The number of hexadecimal digits on the right side of +the decimal-point character is equal to the precision. If the precision +is zero it is determined to be large enough to provide an exact +representation of the number (or it is large enough to distinguish two +adjacent values if the @code{FLT_RADIX} is not a power of 2, +@pxref{Floating Point Parameters}). For the @samp{%a} conversion +lower-case characters are used to represent the hexadecimal number and +the prefix and exponent sign are printed as @code{0x} and @code{p} +respectively. Otherwise upper-case characters are used and @code{0X} +and @code{P} are used for the representation of prefix and exponent +string. The exponent to the base of two is printed as a decimal number +using at least one digit but at most as many digits as necessary to +represent the value exactly. + +If the value to be printed represents infinity or a NaN, the output is +@w{[@code{-}]@code{inf}} or @code{nan} respectively if the conversion +specifier is @samp{%a}, @samp{%e}, @samp{%f}, or @samp{%g} and it is +@w{[@code{-}]@code{INF}} or @code{NAN} respectively if the conversion is +@samp{%A}, @samp{%E}, or @samp{%G}. + +The following flags can be used to modify the behavior: + +@comment We use @asis instead of @samp so we can have ` ' as an item. +@table @asis +@item @samp{-} +Left-justify the result in the field. Normally the result is +right-justified. + +@item @samp{+} +Always include a plus or minus sign in the result. + +@item @samp{ } +If the result doesn't start with a plus or minus sign, prefix it with a +space instead. Since the @samp{+} flag ensures that the result includes +a sign, this flag is ignored if you supply both of them. + +@item @samp{#} +Specifies that the result should always include a decimal point, even +if no digits follow it. For the @samp{%g} and @samp{%G} conversions, +this also forces trailing zeros after the decimal point to be left +in place where they would otherwise be removed. + +@item @samp{'} +Separate the digits of the integer part of the result into groups as +specified by the locale specified for the @code{LC_NUMERIC} category; +@pxref{General Numeric}. This flag is a GNU extension. + +@item @samp{0} +Pad the field with zeros instead of spaces; the zeros are placed +after any sign. This flag is ignored if the @samp{-} flag is also +specified. +@end table + +The precision specifies how many digits follow the decimal-point +character for the @samp{%f}, @samp{%e}, and @samp{%E} conversions. For +these conversions, the default precision is @code{6}. If the precision +is explicitly @code{0}, this suppresses the decimal point character +entirely. For the @samp{%g} and @samp{%G} conversions, the precision +specifies how many significant digits to print. Significant digits are +the first digit before the decimal point, and all the digits after it. +If the precision is @code{0} or not specified for @samp{%g} or @samp{%G}, +it is treated like a value of @code{1}. If the value being printed +cannot be expressed accurately in the specified number of digits, the +value is rounded to the nearest number that fits. + +Without a type modifier, the floating-point conversions use an argument +of type @code{double}. (By the default argument promotions, any +@code{float} arguments are automatically converted to @code{double}.) +The following type modifier is supported: + +@table @samp +@item L +An uppercase @samp{L} specifies that the argument is a @code{long +double}. +@end table + +Here are some examples showing how numbers print using the various +floating-point conversions. All of the numbers were printed using +this template string: + +@smallexample +"|%13.4a|%13.4f|%13.4e|%13.4g|\n" +@end smallexample + +Here is the output: + +@smallexample +| 0x0.0000p+0| 0.0000| 0.0000e+00| 0| +| 0x1.0000p-1| 0.5000| 5.0000e-01| 0.5| +| 0x1.0000p+0| 1.0000| 1.0000e+00| 1| +| -0x1.0000p+0| -1.0000| -1.0000e+00| -1| +| 0x1.9000p+6| 100.0000| 1.0000e+02| 100| +| 0x1.f400p+9| 1000.0000| 1.0000e+03| 1000| +| 0x1.3880p+13| 10000.0000| 1.0000e+04| 1e+04| +| 0x1.81c8p+13| 12345.0000| 1.2345e+04| 1.234e+04| +| 0x1.86a0p+16| 100000.0000| 1.0000e+05| 1e+05| +| 0x1.e240p+16| 123456.0000| 1.2346e+05| 1.235e+05| +@end smallexample + +Notice how the @samp{%g} conversion drops trailing zeros. + +@node Other Output Conversions +@subsection Other Output Conversions + +This section describes miscellaneous conversions for @code{printf}. + +The @samp{%c} conversion prints a single character. In case there is no +@samp{l} modifier the @code{int} argument is first converted to an +@code{unsigned char}. Then, if used in a wide stream function, the +character is converted into the corresponding wide character. The +@samp{-} flag can be used to specify left-justification in the field, +but no other flags are defined, and no precision or type modifier can be +given. For example: + +@smallexample +printf ("%c%c%c%c%c", 'h', 'e', 'l', 'l', 'o'); +@end smallexample + +@noindent +prints @samp{hello}. + +If there is an @samp{l} modifier present the argument is expected to be +of type @code{wint_t}. If used in a multibyte function the wide +character is converted into a multibyte character before being added to +the output. In this case more than one output byte can be produced. + +The @samp{%s} conversion prints a string. If no @samp{l} modifier is +present the corresponding argument must be of type @code{char *} (or +@code{const char *}). If used in a wide stream function the string is +first converted to a wide character string. A precision can be +specified to indicate the maximum number of characters to write; +otherwise characters in the string up to but not including the +terminating null character are written to the output stream. The +@samp{-} flag can be used to specify left-justification in the field, +but no other flags or type modifiers are defined for this conversion. +For example: + +@smallexample +printf ("%3s%-6s", "no", "where"); +@end smallexample + +@noindent +prints @samp{ nowhere }. + +If there is an @samp{l} modifier present, the argument is expected to +be of type @code{wchar_t} (or @code{const wchar_t *}). + +If you accidentally pass a null pointer as the argument for a @samp{%s} +conversion, @theglibc{} prints it as @samp{(null)}. We think this +is more useful than crashing. But it's not good practice to pass a null +argument intentionally. + +The @samp{%m} conversion prints the string corresponding to the error +code in @code{errno}. @xref{Error Messages}. Thus: + +@smallexample +fprintf (stderr, "can't open `%s': %m\n", filename); +@end smallexample + +@noindent +is equivalent to: + +@smallexample +fprintf (stderr, "can't open `%s': %s\n", filename, strerror (errno)); +@end smallexample + +@noindent +The @samp{%m} conversion is a @glibcadj{} extension. + +The @samp{%p} conversion prints a pointer value. The corresponding +argument must be of type @code{void *}. In practice, you can use any +type of pointer. + +In @theglibc{}, non-null pointers are printed as unsigned integers, +as if a @samp{%#x} conversion were used. Null pointers print as +@samp{(nil)}. (Pointers might print differently in other systems.) + +For example: + +@smallexample +printf ("%p", "testing"); +@end smallexample + +@noindent +prints @samp{0x} followed by a hexadecimal number---the address of the +string constant @code{"testing"}. It does not print the word +@samp{testing}. + +You can supply the @samp{-} flag with the @samp{%p} conversion to +specify left-justification, but no other flags, precision, or type +modifiers are defined. + +The @samp{%n} conversion is unlike any of the other output conversions. +It uses an argument which must be a pointer to an @code{int}, but +instead of printing anything it stores the number of characters printed +so far by this call at that location. The @samp{h} and @samp{l} type +modifiers are permitted to specify that the argument is of type +@code{short int *} or @code{long int *} instead of @code{int *}, but no +flags, field width, or precision are permitted. + +For example, + +@smallexample +int nchar; +printf ("%d %s%n\n", 3, "bears", &nchar); +@end smallexample + +@noindent +prints: + +@smallexample +3 bears +@end smallexample + +@noindent +and sets @code{nchar} to @code{7}, because @samp{3 bears} is seven +characters. + + +The @samp{%%} conversion prints a literal @samp{%} character. This +conversion doesn't use an argument, and no flags, field width, +precision, or type modifiers are permitted. + + +@node Formatted Output Functions +@subsection Formatted Output Functions + +This section describes how to call @code{printf} and related functions. +Prototypes for these functions are in the header file @file{stdio.h}. +Because these functions take a variable number of arguments, you +@emph{must} declare prototypes for them before using them. Of course, +the easiest way to make sure you have all the right prototypes is to +just include @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypefun int printf (const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +The @code{printf} function prints the optional arguments under the +control of the template string @var{template} to the stream +@code{stdout}. It returns the number of characters printed, or a +negative value if there was an output error. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int wprintf (const wchar_t *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +The @code{wprintf} function prints the optional arguments under the +control of the wide template string @var{template} to the stream +@code{stdout}. It returns the number of wide characters printed, or a +negative value if there was an output error. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int fprintf (FILE *@var{stream}, const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This function is just like @code{printf}, except that the output is +written to the stream @var{stream} instead of @code{stdout}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int fwprintf (FILE *@var{stream}, const wchar_t *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This function is just like @code{wprintf}, except that the output is +written to the stream @var{stream} instead of @code{stdout}. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int sprintf (char *@var{s}, const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is like @code{printf}, except that the output is stored in the character +array @var{s} instead of written to a stream. A null character is written +to mark the end of the string. + +The @code{sprintf} function returns the number of characters stored in +the array @var{s}, not including the terminating null character. + +The behavior of this function is undefined if copying takes place +between objects that overlap---for example, if @var{s} is also given +as an argument to be printed under control of the @samp{%s} conversion. +@xref{Copying Strings and Arrays}. + +@strong{Warning:} The @code{sprintf} function can be @strong{dangerous} +because it can potentially output more characters than can fit in the +allocation size of the string @var{s}. Remember that the field width +given in a conversion specification is only a @emph{minimum} value. + +To avoid this problem, you can use @code{snprintf} or @code{asprintf}, +described below. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun int swprintf (wchar_t *@var{ws}, size_t @var{size}, const wchar_t *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is like @code{wprintf}, except that the output is stored in the +wide character array @var{ws} instead of written to a stream. A null +wide character is written to mark the end of the string. The @var{size} +argument specifies the maximum number of characters to produce. The +trailing null character is counted towards this limit, so you should +allocate at least @var{size} wide characters for the string @var{ws}. + +The return value is the number of characters generated for the given +input, excluding the trailing null. If not all output fits into the +provided buffer a negative value is returned. You should try again with +a bigger output string. @emph{Note:} this is different from how +@code{snprintf} handles this situation. + +Note that the corresponding narrow stream function takes fewer +parameters. @code{swprintf} in fact corresponds to the @code{snprintf} +function. Since the @code{sprintf} function can be dangerous and should +be avoided the @w{ISO C} committee refused to make the same mistake +again and decided to not define a function exactly corresponding to +@code{sprintf}. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int snprintf (char *@var{s}, size_t @var{size}, const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{snprintf} function is similar to @code{sprintf}, except that +the @var{size} argument specifies the maximum number of characters to +produce. The trailing null character is counted towards this limit, so +you should allocate at least @var{size} characters for the string @var{s}. +If @var{size} is zero, nothing, not even the null byte, shall be written and +@var{s} may be a null pointer. + +The return value is the number of characters which would be generated +for the given input, excluding the trailing null. If this value is +greater than or equal to @var{size}, not all characters from the result have +been stored in @var{s}. You should try again with a bigger output +string. Here is an example of doing this: + +@smallexample +@group +/* @r{Construct a message describing the value of a variable} + @r{whose name is @var{name} and whose value is @var{value}.} */ +char * +make_message (char *name, char *value) +@{ + /* @r{Guess we need no more than 100 chars of space.} */ + int size = 100; + char *buffer = (char *) xmalloc (size); + int nchars; +@end group +@group + if (buffer == NULL) + return NULL; + + /* @r{Try to print in the allocated space.} */ + nchars = snprintf (buffer, size, "value of %s is %s", + name, value); +@end group +@group + if (nchars >= size) + @{ + /* @r{Reallocate buffer now that we know + how much space is needed.} */ + size = nchars + 1; + buffer = (char *) xrealloc (buffer, size); + + if (buffer != NULL) + /* @r{Try again.} */ + snprintf (buffer, size, "value of %s is %s", + name, value); + @} + /* @r{The last call worked, return the string.} */ + return buffer; +@} +@end group +@end smallexample + +In practice, it is often easier just to use @code{asprintf}, below. + +@strong{Attention:} In versions of @theglibc{} prior to 2.1 the +return value is the number of characters stored, not including the +terminating null; unless there was not enough space in @var{s} to +store the result in which case @code{-1} is returned. This was +changed in order to comply with the @w{ISO C99} standard. +@end deftypefun + +@node Dynamic Output +@subsection Dynamically Allocating Formatted Output + +The functions in this section do formatted output and place the results +in dynamically allocated memory. + +@comment stdio.h +@comment GNU +@deftypefun int asprintf (char **@var{ptr}, const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This function is similar to @code{sprintf}, except that it dynamically +allocates a string (as with @code{malloc}; @pxref{Unconstrained +Allocation}) to hold the output, instead of putting the output in a +buffer you allocate in advance. The @var{ptr} argument should be the +address of a @code{char *} object, and a successful call to +@code{asprintf} stores a pointer to the newly allocated string at that +location. + +The return value is the number of characters allocated for the buffer, or +less than zero if an error occurred. Usually this means that the buffer +could not be allocated. + +Here is how to use @code{asprintf} to get the same result as the +@code{snprintf} example, but more easily: + +@smallexample +/* @r{Construct a message describing the value of a variable} + @r{whose name is @var{name} and whose value is @var{value}.} */ +char * +make_message (char *name, char *value) +@{ + char *result; + if (asprintf (&result, "value of %s is %s", name, value) < 0) + return NULL; + return result; +@} +@end smallexample +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int obstack_printf (struct obstack *@var{obstack}, const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +This function is similar to @code{asprintf}, except that it uses the +obstack @var{obstack} to allocate the space. @xref{Obstacks}. + +The characters are written onto the end of the current object. +To get at them, you must finish the object with @code{obstack_finish} +(@pxref{Growing Objects}).@refill +@end deftypefun + +@node Variable Arguments Output +@subsection Variable Arguments Output Functions + +The functions @code{vprintf} and friends are provided so that you can +define your own variadic @code{printf}-like functions that make use of +the same internals as the built-in formatted output functions. + +The most natural way to define such functions would be to use a language +construct to say, ``Call @code{printf} and pass this template plus all +of my arguments after the first five.'' But there is no way to do this +in C, and it would be hard to provide a way, since at the C language +level there is no way to tell how many arguments your function received. + +Since that method is impossible, we provide alternative functions, the +@code{vprintf} series, which lets you pass a @code{va_list} to describe +``all of my arguments after the first five.'' + +When it is sufficient to define a macro rather than a real function, +the GNU C compiler provides a way to do this much more easily with macros. +For example: + +@smallexample +#define myprintf(a, b, c, d, e, rest...) \ + printf (mytemplate , ## rest) +@end smallexample + +@noindent +@xref{Variadic Macros,,, cpp, The C preprocessor}, for details. +But this is limited to macros, and does not apply to real functions at all. + +Before calling @code{vprintf} or the other functions listed in this +section, you @emph{must} call @code{va_start} (@pxref{Variadic +Functions}) to initialize a pointer to the variable arguments. Then you +can call @code{va_arg} to fetch the arguments that you want to handle +yourself. This advances the pointer past those arguments. + +Once your @code{va_list} pointer is pointing at the argument of your +choice, you are ready to call @code{vprintf}. That argument and all +subsequent arguments that were passed to your function are used by +@code{vprintf} along with the template that you specified separately. + +@strong{Portability Note:} The value of the @code{va_list} pointer is +undetermined after the call to @code{vprintf}, so you must not use +@code{va_arg} after you call @code{vprintf}. Instead, you should call +@code{va_end} to retire the pointer from service. You can call +@code{va_start} again and begin fetching the arguments from the start of +the variable argument list. (Alternatively, you can use @code{va_copy} +to make a copy of the @code{va_list} pointer before calling +@code{vfprintf}.) Calling @code{vprintf} does not destroy the argument +list of your function, merely the particular pointer that you passed to +it. + +Prototypes for these functions are declared in @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypefun int vprintf (const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This function is similar to @code{printf} except that, instead of taking +a variable number of arguments directly, it takes an argument list +pointer @var{ap}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int vwprintf (const wchar_t *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This function is similar to @code{wprintf} except that, instead of taking +a variable number of arguments directly, it takes an argument list +pointer @var{ap}. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int vfprintf (FILE *@var{stream}, const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +@c Although vfprintf sets up a cleanup region to release the lock on the +@c output stream, it doesn't use it to release args_value or string in +@c case of cancellation. This doesn't make it unsafe, but cancelling it +@c may leak memory. The unguarded use of __printf_function_table is +@c also of concern for all callers. +@c _itoa ok +@c _udiv_qrnnd_preinv ok +@c group_number ok +@c _i18n_number_rewrite +@c __wctrans ok +@c __towctrans @mtslocale +@c __wcrtomb ok? dup below +@c outdigit_value ok +@c outdigitwc_value ok +@c outchar ok +@c outstring ok +@c PAD ok +@c __printf_fp @mtslocale @ascuheap @acsmem +@c __printf_fphex @mtslocale +@c __readonly_area +@c [GNU/Linux] fopen, strtoul, free +@c __strerror_r ok if no translation, check otherwise +@c __btowc ? gconv-modules +@c __wcrtomb ok (not using internal state) gconv-modules +@c ARGCHECK +@c UNBUFFERED_P (tested before taking the stream lock) +@c buffered_vfprintf ok +@c __find_spec(wc|mb) +@c read_int +@c __libc_use_alloca +@c process_arg +@c process_string_arg +@c extend_alloca +@c __parse_one_spec(wc|mb) +@c *__printf_arginfo_table unguarded +@c __printf_va_arg_table-> unguarded +@c *__printf_function_table unguarded +@c done_add +@c printf_unknown +@c outchar +@c _itoa_word +This is the equivalent of @code{fprintf} with the variable argument list +specified directly as for @code{vprintf}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int vfwprintf (FILE *@var{stream}, const wchar_t *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This is the equivalent of @code{fwprintf} with the variable argument list +specified directly as for @code{vwprintf}. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int vsprintf (char *@var{s}, const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is the equivalent of @code{sprintf} with the variable argument list +specified directly as for @code{vprintf}. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun int vswprintf (wchar_t *@var{ws}, size_t @var{size}, const wchar_t *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is the equivalent of @code{swprintf} with the variable argument list +specified directly as for @code{vwprintf}. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int vsnprintf (char *@var{s}, size_t @var{size}, const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is the equivalent of @code{snprintf} with the variable argument list +specified directly as for @code{vprintf}. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int vasprintf (char **@var{ptr}, const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{vasprintf} function is the equivalent of @code{asprintf} with the +variable argument list specified directly as for @code{vprintf}. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int obstack_vprintf (struct obstack *@var{obstack}, const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtsrace{:obstack} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c The obstack is not guarded by mutexes, it might be at an inconsistent +@c state within a signal handler, and it could be left at an +@c inconsistent state in case of cancellation. +The @code{obstack_vprintf} function is the equivalent of +@code{obstack_printf} with the variable argument list specified directly +as for @code{vprintf}.@refill +@end deftypefun + +Here's an example showing how you might use @code{vfprintf}. This is a +function that prints error messages to the stream @code{stderr}, along +with a prefix indicating the name of the program +(@pxref{Error Messages}, for a description of +@code{program_invocation_short_name}). + +@smallexample +@group +#include <stdio.h> +#include <stdarg.h> + +void +eprintf (const char *template, ...) +@{ + va_list ap; + extern char *program_invocation_short_name; + + fprintf (stderr, "%s: ", program_invocation_short_name); + va_start (ap, template); + vfprintf (stderr, template, ap); + va_end (ap); +@} +@end group +@end smallexample + +@noindent +You could call @code{eprintf} like this: + +@smallexample +eprintf ("file `%s' does not exist\n", filename); +@end smallexample + +In GNU C, there is a special construct you can use to let the compiler +know that a function uses a @code{printf}-style format string. Then it +can check the number and types of arguments in each call to the +function, and warn you when they do not match the format string. +For example, take this declaration of @code{eprintf}: + +@smallexample +void eprintf (const char *template, ...) + __attribute__ ((format (printf, 1, 2))); +@end smallexample + +@noindent +This tells the compiler that @code{eprintf} uses a format string like +@code{printf} (as opposed to @code{scanf}; @pxref{Formatted Input}); +the format string appears as the first argument; +and the arguments to satisfy the format begin with the second. +@xref{Function Attributes, , Declaring Attributes of Functions, +gcc.info, Using GNU CC}, for more information. + +@node Parsing a Template String +@subsection Parsing a Template String +@cindex parsing a template string + +You can use the function @code{parse_printf_format} to obtain +information about the number and types of arguments that are expected by +a given template string. This function permits interpreters that +provide interfaces to @code{printf} to avoid passing along invalid +arguments from the user's program, which could cause a crash. + +All the symbols described in this section are declared in the header +file @file{printf.h}. + +@comment printf.h +@comment GNU +@deftypefun size_t parse_printf_format (const char *@var{template}, size_t @var{n}, int *@var{argtypes}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +This function returns information about the number and types of +arguments expected by the @code{printf} template string @var{template}. +The information is stored in the array @var{argtypes}; each element of +this array describes one argument. This information is encoded using +the various @samp{PA_} macros, listed below. + +The argument @var{n} specifies the number of elements in the array +@var{argtypes}. This is the maximum number of elements that +@code{parse_printf_format} will try to write. + +@code{parse_printf_format} returns the total number of arguments required +by @var{template}. If this number is greater than @var{n}, then the +information returned describes only the first @var{n} arguments. If you +want information about additional arguments, allocate a bigger +array and call @code{parse_printf_format} again. +@end deftypefun + +The argument types are encoded as a combination of a basic type and +modifier flag bits. + +@comment printf.h +@comment GNU +@deftypevr Macro int PA_FLAG_MASK +This macro is a bitmask for the type modifier flag bits. You can write +the expression @code{(argtypes[i] & PA_FLAG_MASK)} to extract just the +flag bits for an argument, or @code{(argtypes[i] & ~PA_FLAG_MASK)} to +extract just the basic type code. +@end deftypevr + +Here are symbolic constants that represent the basic types; they stand +for integer values. + +@vtable @code +@comment printf.h +@comment GNU +@item PA_INT +This specifies that the base type is @code{int}. + +@comment printf.h +@comment GNU +@item PA_CHAR +This specifies that the base type is @code{int}, cast to @code{char}. + +@comment printf.h +@comment GNU +@item PA_STRING +This specifies that the base type is @code{char *}, a null-terminated string. + +@comment printf.h +@comment GNU +@item PA_POINTER +This specifies that the base type is @code{void *}, an arbitrary pointer. + +@comment printf.h +@comment GNU +@item PA_FLOAT +This specifies that the base type is @code{float}. + +@comment printf.h +@comment GNU +@item PA_DOUBLE +This specifies that the base type is @code{double}. + +@comment printf.h +@comment GNU +@item PA_LAST +You can define additional base types for your own programs as offsets +from @code{PA_LAST}. For example, if you have data types @samp{foo} +and @samp{bar} with their own specialized @code{printf} conversions, +you could define encodings for these types as: + +@smallexample +#define PA_FOO PA_LAST +#define PA_BAR (PA_LAST + 1) +@end smallexample +@end vtable + +Here are the flag bits that modify a basic type. They are combined with +the code for the basic type using inclusive-or. + +@vtable @code +@comment printf.h +@comment GNU +@item PA_FLAG_PTR +If this bit is set, it indicates that the encoded type is a pointer to +the base type, rather than an immediate value. +For example, @samp{PA_INT|PA_FLAG_PTR} represents the type @samp{int *}. + +@comment printf.h +@comment GNU +@item PA_FLAG_SHORT +If this bit is set, it indicates that the base type is modified with +@code{short}. (This corresponds to the @samp{h} type modifier.) + +@comment printf.h +@comment GNU +@item PA_FLAG_LONG +If this bit is set, it indicates that the base type is modified with +@code{long}. (This corresponds to the @samp{l} type modifier.) + +@comment printf.h +@comment GNU +@item PA_FLAG_LONG_LONG +If this bit is set, it indicates that the base type is modified with +@code{long long}. (This corresponds to the @samp{L} type modifier.) + +@comment printf.h +@comment GNU +@item PA_FLAG_LONG_DOUBLE +This is a synonym for @code{PA_FLAG_LONG_LONG}, used by convention with +a base type of @code{PA_DOUBLE} to indicate a type of @code{long double}. +@end vtable + +@ifinfo +For an example of using these facilities, see @ref{Example of Parsing}. +@end ifinfo + +@node Example of Parsing +@subsection Example of Parsing a Template String + +Here is an example of decoding argument types for a format string. We +assume this is part of an interpreter which contains arguments of type +@code{NUMBER}, @code{CHAR}, @code{STRING} and @code{STRUCTURE} (and +perhaps others which are not valid here). + +@smallexample +/* @r{Test whether the @var{nargs} specified objects} + @r{in the vector @var{args} are valid} + @r{for the format string @var{format}:} + @r{if so, return 1.} + @r{If not, return 0 after printing an error message.} */ + +int +validate_args (char *format, int nargs, OBJECT *args) +@{ + int *argtypes; + int nwanted; + + /* @r{Get the information about the arguments.} + @r{Each conversion specification must be at least two characters} + @r{long, so there cannot be more specifications than half the} + @r{length of the string.} */ + + argtypes = (int *) alloca (strlen (format) / 2 * sizeof (int)); + nwanted = parse_printf_format (string, nelts, argtypes); + + /* @r{Check the number of arguments.} */ + if (nwanted > nargs) + @{ + error ("too few arguments (at least %d required)", nwanted); + return 0; + @} + + /* @r{Check the C type wanted for each argument} + @r{and see if the object given is suitable.} */ + for (i = 0; i < nwanted; i++) + @{ + int wanted; + + if (argtypes[i] & PA_FLAG_PTR) + wanted = STRUCTURE; + else + switch (argtypes[i] & ~PA_FLAG_MASK) + @{ + case PA_INT: + case PA_FLOAT: + case PA_DOUBLE: + wanted = NUMBER; + break; + case PA_CHAR: + wanted = CHAR; + break; + case PA_STRING: + wanted = STRING; + break; + case PA_POINTER: + wanted = STRUCTURE; + break; + @} + if (TYPE (args[i]) != wanted) + @{ + error ("type mismatch for arg number %d", i); + return 0; + @} + @} + return 1; +@} +@end smallexample + +@node Customizing Printf +@section Customizing @code{printf} +@cindex customizing @code{printf} +@cindex defining new @code{printf} conversions +@cindex extending @code{printf} + +@Theglibc{} lets you define your own custom conversion specifiers +for @code{printf} template strings, to teach @code{printf} clever ways +to print the important data structures of your program. + +The way you do this is by registering the conversion with the function +@code{register_printf_function}; see @ref{Registering New Conversions}. +One of the arguments you pass to this function is a pointer to a handler +function that produces the actual output; see @ref{Defining the Output +Handler}, for information on how to write this function. + +You can also install a function that just returns information about the +number and type of arguments expected by the conversion specifier. +@xref{Parsing a Template String}, for information about this. + +The facilities of this section are declared in the header file +@file{printf.h}. + +@menu +* Registering New Conversions:: Using @code{register_printf_function} + to register a new output conversion. +* Conversion Specifier Options:: The handler must be able to get + the options specified in the + template when it is called. +* Defining the Output Handler:: Defining the handler and arginfo + functions that are passed as arguments + to @code{register_printf_function}. +* Printf Extension Example:: How to define a @code{printf} + handler function. +* Predefined Printf Handlers:: Predefined @code{printf} handlers. +@end menu + +@strong{Portability Note:} The ability to extend the syntax of +@code{printf} template strings is a GNU extension. ISO standard C has +nothing similar. + +@node Registering New Conversions +@subsection Registering New Conversions + +The function to register a new output conversion is +@code{register_printf_function}, declared in @file{printf.h}. +@pindex printf.h + +@comment printf.h +@comment GNU +@deftypefun int register_printf_function (int @var{spec}, printf_function @var{handler-function}, printf_arginfo_function @var{arginfo-function}) +@safety{@prelim{}@mtunsafe{@mtasuconst{:printfext}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @aculock{}}} +@c This function is guarded by the global non-recursive libc lock, but +@c users of the variables it sets aren't, and those should be MT-Safe, +@c so we're ruling out the use of this extension with threads. Calling +@c it from a signal handler may self-deadlock, and cancellation may +@c leave the lock held, besides leaking allocated memory. +This function defines the conversion specifier character @var{spec}. +Thus, if @var{spec} is @code{'Y'}, it defines the conversion @samp{%Y}. +You can redefine the built-in conversions like @samp{%s}, but flag +characters like @samp{#} and type modifiers like @samp{l} can never be +used as conversions; calling @code{register_printf_function} for those +characters has no effect. It is advisable not to use lowercase letters, +since the ISO C standard warns that additional lowercase letters may be +standardized in future editions of the standard. + +The @var{handler-function} is the function called by @code{printf} and +friends when this conversion appears in a template string. +@xref{Defining the Output Handler}, for information about how to define +a function to pass as this argument. If you specify a null pointer, any +existing handler function for @var{spec} is removed. + +The @var{arginfo-function} is the function called by +@code{parse_printf_format} when this conversion appears in a +template string. @xref{Parsing a Template String}, for information +about this. + +@c The following is not true anymore. The `parse_printf_format' function +@c is now also called from `vfprintf' via `parse_one_spec'. +@c --drepper@gnu, 1996/11/14 +@c +@c Normally, you install both functions for a conversion at the same time, +@c but if you are never going to call @code{parse_printf_format}, you do +@c not need to define an arginfo function. + +@strong{Attention:} In @theglibc{} versions before 2.0 the +@var{arginfo-function} function did not need to be installed unless +the user used the @code{parse_printf_format} function. This has changed. +Now a call to any of the @code{printf} functions will call this +function when this format specifier appears in the format string. + +The return value is @code{0} on success, and @code{-1} on failure +(which occurs if @var{spec} is out of range). + +You can redefine the standard output conversions, but this is probably +not a good idea because of the potential for confusion. Library routines +written by other people could break if you do this. +@end deftypefun + +@node Conversion Specifier Options +@subsection Conversion Specifier Options + +If you define a meaning for @samp{%A}, what if the template contains +@samp{%+23A} or @samp{%-#A}? To implement a sensible meaning for these, +the handler when called needs to be able to get the options specified in +the template. + +Both the @var{handler-function} and @var{arginfo-function} accept an +argument that points to a @code{struct printf_info}, which contains +information about the options appearing in an instance of the conversion +specifier. This data type is declared in the header file +@file{printf.h}. +@pindex printf.h + +@comment printf.h +@comment GNU +@deftp {Type} {struct printf_info} +This structure is used to pass information about the options appearing +in an instance of a conversion specifier in a @code{printf} template +string to the handler and arginfo functions for that specifier. It +contains the following members: + +@table @code +@item int prec +This is the precision specified. The value is @code{-1} if no precision +was specified. If the precision was given as @samp{*}, the +@code{printf_info} structure passed to the handler function contains the +actual value retrieved from the argument list. But the structure passed +to the arginfo function contains a value of @code{INT_MIN}, since the +actual value is not known. + +@item int width +This is the minimum field width specified. The value is @code{0} if no +width was specified. If the field width was given as @samp{*}, the +@code{printf_info} structure passed to the handler function contains the +actual value retrieved from the argument list. But the structure passed +to the arginfo function contains a value of @code{INT_MIN}, since the +actual value is not known. + +@item wchar_t spec +This is the conversion specifier character specified. It's stored in +the structure so that you can register the same handler function for +multiple characters, but still have a way to tell them apart when the +handler function is called. + +@item unsigned int is_long_double +This is a boolean that is true if the @samp{L}, @samp{ll}, or @samp{q} +type modifier was specified. For integer conversions, this indicates +@code{long long int}, as opposed to @code{long double} for floating +point conversions. + +@item unsigned int is_char +This is a boolean that is true if the @samp{hh} type modifier was specified. + +@item unsigned int is_short +This is a boolean that is true if the @samp{h} type modifier was specified. + +@item unsigned int is_long +This is a boolean that is true if the @samp{l} type modifier was specified. + +@item unsigned int alt +This is a boolean that is true if the @samp{#} flag was specified. + +@item unsigned int space +This is a boolean that is true if the @samp{ } flag was specified. + +@item unsigned int left +This is a boolean that is true if the @samp{-} flag was specified. + +@item unsigned int showsign +This is a boolean that is true if the @samp{+} flag was specified. + +@item unsigned int group +This is a boolean that is true if the @samp{'} flag was specified. + +@item unsigned int extra +This flag has a special meaning depending on the context. It could +be used freely by the user-defined handlers but when called from +the @code{printf} function this variable always contains the value +@code{0}. + +@item unsigned int wide +This flag is set if the stream is wide oriented. + +@item wchar_t pad +This is the character to use for padding the output to the minimum field +width. The value is @code{'0'} if the @samp{0} flag was specified, and +@code{' '} otherwise. +@end table +@end deftp + + +@node Defining the Output Handler +@subsection Defining the Output Handler + +Now let's look at how to define the handler and arginfo functions +which are passed as arguments to @code{register_printf_function}. + +@strong{Compatibility Note:} The interface changed in @theglibc{} +version 2.0. Previously the third argument was of type +@code{va_list *}. + +You should define your handler functions with a prototype like: + +@smallexample +int @var{function} (FILE *stream, const struct printf_info *info, + const void *const *args) +@end smallexample + +The @var{stream} argument passed to the handler function is the stream to +which it should write output. + +The @var{info} argument is a pointer to a structure that contains +information about the various options that were included with the +conversion in the template string. You should not modify this structure +inside your handler function. @xref{Conversion Specifier Options}, for +a description of this data structure. + +@c The following changes some time back. --drepper@gnu, 1996/11/14 +@c +@c The @code{ap_pointer} argument is used to pass the tail of the variable +@c argument list containing the values to be printed to your handler. +@c Unlike most other functions that can be passed an explicit variable +@c argument list, this is a @emph{pointer} to a @code{va_list}, rather than +@c the @code{va_list} itself. Thus, you should fetch arguments by +@c means of @code{va_arg (*ap_pointer, @var{type})}. +@c +@c (Passing a pointer here allows the function that calls your handler +@c function to update its own @code{va_list} variable to account for the +@c arguments that your handler processes. @xref{Variadic Functions}.) + +The @var{args} is a vector of pointers to the arguments data. +The number of arguments was determined by calling the argument +information function provided by the user. + +Your handler function should return a value just like @code{printf} +does: it should return the number of characters it has written, or a +negative value to indicate an error. + +@comment printf.h +@comment GNU +@deftp {Data Type} printf_function +This is the data type that a handler function should have. +@end deftp + +If you are going to use @w{@code{parse_printf_format}} in your +application, you must also define a function to pass as the +@var{arginfo-function} argument for each new conversion you install with +@code{register_printf_function}. + +You have to define these functions with a prototype like: + +@smallexample +int @var{function} (const struct printf_info *info, + size_t n, int *argtypes) +@end smallexample + +The return value from the function should be the number of arguments the +conversion expects. The function should also fill in no more than +@var{n} elements of the @var{argtypes} array with information about the +types of each of these arguments. This information is encoded using the +various @samp{PA_} macros. (You will notice that this is the same +calling convention @code{parse_printf_format} itself uses.) + +@comment printf.h +@comment GNU +@deftp {Data Type} printf_arginfo_function +This type is used to describe functions that return information about +the number and type of arguments used by a conversion specifier. +@end deftp + +@node Printf Extension Example +@subsection @code{printf} Extension Example + +Here is an example showing how to define a @code{printf} handler function. +This program defines a data structure called a @code{Widget} and +defines the @samp{%W} conversion to print information about @w{@code{Widget *}} +arguments, including the pointer value and the name stored in the data +structure. The @samp{%W} conversion supports the minimum field width and +left-justification options, but ignores everything else. + +@smallexample +@include rprintf.c.texi +@end smallexample + +The output produced by this program looks like: + +@smallexample +|<Widget 0xffeffb7c: mywidget>| +| <Widget 0xffeffb7c: mywidget>| +|<Widget 0xffeffb7c: mywidget> | +@end smallexample + +@node Predefined Printf Handlers +@subsection Predefined @code{printf} Handlers + +@Theglibc{} also contains a concrete and useful application of the +@code{printf} handler extension. There are two functions available +which implement a special way to print floating-point numbers. + +@comment printf.h +@comment GNU +@deftypefun int printf_size (FILE *@var{fp}, const struct printf_info *@var{info}, const void *const *@var{args}) +@safety{@prelim{}@mtsafe{@mtsrace{:fp} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @acucorrupt{}}} +@c This is meant to be called by vfprintf, that should hold the lock on +@c the stream, but if this function is called directly, output will be +@c racy, besides the uses of the global locale object while other +@c threads may be changing it and the possbility of leaving the stream +@c object in an inconsistent state in case of cancellation. +Print a given floating point number as for the format @code{%f} except +that there is a postfix character indicating the divisor for the +number to make this less than 1000. There are two possible divisors: +powers of 1024 or powers of 1000. Which one is used depends on the +format character specified while registered this handler. If the +character is of lower case, 1024 is used. For upper case characters, +1000 is used. + +The postfix tag corresponds to bytes, kilobytes, megabytes, gigabytes, +etc. The full table is: + +@ifinfo +@multitable {' '} {2^10 (1024)} {zetta} {Upper} {10^24 (1000)} +@item low @tab Multiplier @tab From @tab Upper @tab Multiplier +@item ' ' @tab 1 @tab @tab ' ' @tab 1 +@item k @tab 2^10 (1024) @tab kilo @tab K @tab 10^3 (1000) +@item m @tab 2^20 @tab mega @tab M @tab 10^6 +@item g @tab 2^30 @tab giga @tab G @tab 10^9 +@item t @tab 2^40 @tab tera @tab T @tab 10^12 +@item p @tab 2^50 @tab peta @tab P @tab 10^15 +@item e @tab 2^60 @tab exa @tab E @tab 10^18 +@item z @tab 2^70 @tab zetta @tab Z @tab 10^21 +@item y @tab 2^80 @tab yotta @tab Y @tab 10^24 +@end multitable +@end ifinfo +@iftex +@tex +\hbox to\hsize{\hfil\vbox{\offinterlineskip +\hrule +\halign{\strut#& \vrule#\tabskip=1em plus2em& {\tt#}\hfil& \vrule#& #\hfil& \vrule#& #\hfil& \vrule#& {\tt#}\hfil& \vrule#& #\hfil& \vrule#\tabskip=0pt\cr +\noalign{\hrule} +\omit&height2pt&\omit&&\omit&&\omit&&\omit&&\omit&\cr +&& \omit low && Multiplier && From && \omit Upper && Multiplier &\cr +\omit&height2pt&\omit&&\omit&&\omit&&\omit&&\omit&\cr +\noalign{\hrule} +&& {\tt\char32} && 1 && && {\tt\char32} && 1 &\cr +&& k && $2^{10} = 1024$ && kilo && K && $10^3 = 1000$ &\cr +&& m && $2^{20}$ && mega && M && $10^6$ &\cr +&& g && $2^{30}$ && giga && G && $10^9$ &\cr +&& t && $2^{40}$ && tera && T && $10^{12}$ &\cr +&& p && $2^{50}$ && peta && P && $10^{15}$ &\cr +&& e && $2^{60}$ && exa && E && $10^{18}$ &\cr +&& z && $2^{70}$ && zetta && Z && $10^{21}$ &\cr +&& y && $2^{80}$ && yotta && Y && $10^{24}$ &\cr +\noalign{\hrule}}}\hfil} +@end tex +@end iftex + +The default precision is 3, i.e., 1024 is printed with a lower-case +format character as if it were @code{%.3fk} and will yield @code{1.000k}. +@end deftypefun + +Due to the requirements of @code{register_printf_function} we must also +provide the function which returns information about the arguments. + +@comment printf.h +@comment GNU +@deftypefun int printf_size_info (const struct printf_info *@var{info}, size_t @var{n}, int *@var{argtypes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function will return in @var{argtypes} the information about the +used parameters in the way the @code{vfprintf} implementation expects +it. The format always takes one argument. +@end deftypefun + +To use these functions both functions must be registered with a call like + +@smallexample +register_printf_function ('B', printf_size, printf_size_info); +@end smallexample + +Here we register the functions to print numbers as powers of 1000 since +the format character @code{'B'} is an upper-case character. If we +would additionally use @code{'b'} in a line like + +@smallexample +register_printf_function ('b', printf_size, printf_size_info); +@end smallexample + +@noindent +we could also print using a power of 1024. Please note that all that is +different in these two lines is the format specifier. The +@code{printf_size} function knows about the difference between lower and upper +case format specifiers. + +The use of @code{'B'} and @code{'b'} is no coincidence. Rather it is +the preferred way to use this functionality since it is available on +some other systems which also use format specifiers. + +@node Formatted Input +@section Formatted Input + +@cindex formatted input from a stream +@cindex reading from a stream, formatted +@cindex format string, for @code{scanf} +@cindex template, for @code{scanf} +The functions described in this section (@code{scanf} and related +functions) provide facilities for formatted input analogous to the +formatted output facilities. These functions provide a mechanism for +reading arbitrary values under the control of a @dfn{format string} or +@dfn{template string}. + +@menu +* Formatted Input Basics:: Some basics to get you started. +* Input Conversion Syntax:: Syntax of conversion specifications. +* Table of Input Conversions:: Summary of input conversions and what they do. +* Numeric Input Conversions:: Details of conversions for reading numbers. +* String Input Conversions:: Details of conversions for reading strings. +* Dynamic String Input:: String conversions that @code{malloc} the buffer. +* Other Input Conversions:: Details of miscellaneous other conversions. +* Formatted Input Functions:: Descriptions of the actual functions. +* Variable Arguments Input:: @code{vscanf} and friends. +@end menu + +@node Formatted Input Basics +@subsection Formatted Input Basics + +Calls to @code{scanf} are superficially similar to calls to +@code{printf} in that arbitrary arguments are read under the control of +a template string. While the syntax of the conversion specifications in +the template is very similar to that for @code{printf}, the +interpretation of the template is oriented more towards free-format +input and simple pattern matching, rather than fixed-field formatting. +For example, most @code{scanf} conversions skip over any amount of +``white space'' (including spaces, tabs, and newlines) in the input +file, and there is no concept of precision for the numeric input +conversions as there is for the corresponding output conversions. +Ordinarily, non-whitespace characters in the template are expected to +match characters in the input stream exactly, but a matching failure is +distinct from an input error on the stream. +@cindex conversion specifications (@code{scanf}) + +Another area of difference between @code{scanf} and @code{printf} is +that you must remember to supply pointers rather than immediate values +as the optional arguments to @code{scanf}; the values that are read are +stored in the objects that the pointers point to. Even experienced +programmers tend to forget this occasionally, so if your program is +getting strange errors that seem to be related to @code{scanf}, you +might want to double-check this. + +When a @dfn{matching failure} occurs, @code{scanf} returns immediately, +leaving the first non-matching character as the next character to be +read from the stream. The normal return value from @code{scanf} is the +number of values that were assigned, so you can use this to determine if +a matching error happened before all the expected values were read. +@cindex matching failure, in @code{scanf} + +The @code{scanf} function is typically used for things like reading in +the contents of tables. For example, here is a function that uses +@code{scanf} to initialize an array of @code{double}: + +@smallexample +void +readarray (double *array, int n) +@{ + int i; + for (i=0; i<n; i++) + if (scanf (" %lf", &(array[i])) != 1) + invalid_input_error (); +@} +@end smallexample + +The formatted input functions are not used as frequently as the +formatted output functions. Partly, this is because it takes some care +to use them properly. Another reason is that it is difficult to recover +from a matching error. + +If you are trying to read input that doesn't match a single, fixed +pattern, you may be better off using a tool such as Flex to generate a +lexical scanner, or Bison to generate a parser, rather than using +@code{scanf}. For more information about these tools, see @ref{Top, , , +flex.info, Flex: The Lexical Scanner Generator}, and @ref{Top, , , +bison.info, The Bison Reference Manual}. + +@node Input Conversion Syntax +@subsection Input Conversion Syntax + +A @code{scanf} template string is a string that contains ordinary +multibyte characters interspersed with conversion specifications that +start with @samp{%}. + +Any whitespace character (as defined by the @code{isspace} function; +@pxref{Classification of Characters}) in the template causes any number +of whitespace characters in the input stream to be read and discarded. +The whitespace characters that are matched need not be exactly the same +whitespace characters that appear in the template string. For example, +write @samp{ , } in the template to recognize a comma with optional +whitespace before and after. + +Other characters in the template string that are not part of conversion +specifications must match characters in the input stream exactly; if +this is not the case, a matching failure occurs. + +The conversion specifications in a @code{scanf} template string +have the general form: + +@smallexample +% @var{flags} @var{width} @var{type} @var{conversion} +@end smallexample + +In more detail, an input conversion specification consists of an initial +@samp{%} character followed in sequence by: + +@itemize @bullet +@item +An optional @dfn{flag character} @samp{*}, which says to ignore the text +read for this specification. When @code{scanf} finds a conversion +specification that uses this flag, it reads input as directed by the +rest of the conversion specification, but it discards this input, does +not use a pointer argument, and does not increment the count of +successful assignments. +@cindex flag character (@code{scanf}) + +@item +An optional flag character @samp{a} (valid with string conversions only) +which requests allocation of a buffer long enough to store the string in. +(This is a GNU extension.) +@xref{Dynamic String Input}. + +@item +An optional decimal integer that specifies the @dfn{maximum field +width}. Reading of characters from the input stream stops either when +this maximum is reached or when a non-matching character is found, +whichever happens first. Most conversions discard initial whitespace +characters (those that don't are explicitly documented), and these +discarded characters don't count towards the maximum field width. +String input conversions store a null character to mark the end of the +input; the maximum field width does not include this terminator. +@cindex maximum field width (@code{scanf}) + +@item +An optional @dfn{type modifier character}. For example, you can +specify a type modifier of @samp{l} with integer conversions such as +@samp{%d} to specify that the argument is a pointer to a @code{long int} +rather than a pointer to an @code{int}. +@cindex type modifier character (@code{scanf}) + +@item +A character that specifies the conversion to be applied. +@end itemize + +The exact options that are permitted and how they are interpreted vary +between the different conversion specifiers. See the descriptions of the +individual conversions for information about the particular options that +they allow. + +With the @samp{-Wformat} option, the GNU C compiler checks calls to +@code{scanf} and related functions. It examines the format string and +verifies that the correct number and types of arguments are supplied. +There is also a GNU C syntax to tell the compiler that a function you +write uses a @code{scanf}-style format string. +@xref{Function Attributes, , Declaring Attributes of Functions, +gcc.info, Using GNU CC}, for more information. + +@node Table of Input Conversions +@subsection Table of Input Conversions +@cindex input conversions, for @code{scanf} + +Here is a table that summarizes the various conversion specifications: + +@table @asis +@item @samp{%d} +Matches an optionally signed integer written in decimal. @xref{Numeric +Input Conversions}. + +@item @samp{%i} +Matches an optionally signed integer in any of the formats that the C +language defines for specifying an integer constant. @xref{Numeric +Input Conversions}. + +@item @samp{%o} +Matches an unsigned integer written in octal radix. +@xref{Numeric Input Conversions}. + +@item @samp{%u} +Matches an unsigned integer written in decimal radix. +@xref{Numeric Input Conversions}. + +@item @samp{%x}, @samp{%X} +Matches an unsigned integer written in hexadecimal radix. +@xref{Numeric Input Conversions}. + +@item @samp{%e}, @samp{%f}, @samp{%g}, @samp{%E}, @samp{%G} +Matches an optionally signed floating-point number. @xref{Numeric Input +Conversions}. + +@item @samp{%s} + +Matches a string containing only non-whitespace characters. +@xref{String Input Conversions}. The presence of the @samp{l} modifier +determines whether the output is stored as a wide character string or a +multibyte string. If @samp{%s} is used in a wide character function the +string is converted as with multiple calls to @code{wcrtomb} into a +multibyte string. This means that the buffer must provide room for +@code{MB_CUR_MAX} bytes for each wide character read. In case +@samp{%ls} is used in a multibyte function the result is converted into +wide characters as with multiple calls of @code{mbrtowc} before being +stored in the user provided buffer. + +@item @samp{%S} +This is an alias for @samp{%ls} which is supported for compatibility +with the Unix standard. + +@item @samp{%[} +Matches a string of characters that belong to a specified set. +@xref{String Input Conversions}. The presence of the @samp{l} modifier +determines whether the output is stored as a wide character string or a +multibyte string. If @samp{%[} is used in a wide character function the +string is converted as with multiple calls to @code{wcrtomb} into a +multibyte string. This means that the buffer must provide room for +@code{MB_CUR_MAX} bytes for each wide character read. In case +@samp{%l[} is used in a multibyte function the result is converted into +wide characters as with multiple calls of @code{mbrtowc} before being +stored in the user provided buffer. + +@item @samp{%c} +Matches a string of one or more characters; the number of characters +read is controlled by the maximum field width given for the conversion. +@xref{String Input Conversions}. + +If @samp{%c} is used in a wide stream function the read value is +converted from a wide character to the corresponding multibyte character +before storing it. Note that this conversion can produce more than one +byte of output and therefore the provided buffer must be large enough for up +to @code{MB_CUR_MAX} bytes for each character. If @samp{%lc} is used in +a multibyte function the input is treated as a multibyte sequence (and +not bytes) and the result is converted as with calls to @code{mbrtowc}. + +@item @samp{%C} +This is an alias for @samp{%lc} which is supported for compatibility +with the Unix standard. + +@item @samp{%p} +Matches a pointer value in the same implementation-defined format used +by the @samp{%p} output conversion for @code{printf}. @xref{Other Input +Conversions}. + +@item @samp{%n} +This conversion doesn't read any characters; it records the number of +characters read so far by this call. @xref{Other Input Conversions}. + +@item @samp{%%} +This matches a literal @samp{%} character in the input stream. No +corresponding argument is used. @xref{Other Input Conversions}. +@end table + +If the syntax of a conversion specification is invalid, the behavior is +undefined. If there aren't enough function arguments provided to supply +addresses for all the conversion specifications in the template strings +that perform assignments, or if the arguments are not of the correct +types, the behavior is also undefined. On the other hand, extra +arguments are simply ignored. + +@node Numeric Input Conversions +@subsection Numeric Input Conversions + +This section describes the @code{scanf} conversions for reading numeric +values. + +The @samp{%d} conversion matches an optionally signed integer in decimal +radix. The syntax that is recognized is the same as that for the +@code{strtol} function (@pxref{Parsing of Integers}) with the value +@code{10} for the @var{base} argument. + +The @samp{%i} conversion matches an optionally signed integer in any of +the formats that the C language defines for specifying an integer +constant. The syntax that is recognized is the same as that for the +@code{strtol} function (@pxref{Parsing of Integers}) with the value +@code{0} for the @var{base} argument. (You can print integers in this +syntax with @code{printf} by using the @samp{#} flag character with the +@samp{%x}, @samp{%o}, or @samp{%d} conversion. @xref{Integer Conversions}.) + +For example, any of the strings @samp{10}, @samp{0xa}, or @samp{012} +could be read in as integers under the @samp{%i} conversion. Each of +these specifies a number with decimal value @code{10}. + +The @samp{%o}, @samp{%u}, and @samp{%x} conversions match unsigned +integers in octal, decimal, and hexadecimal radices, respectively. The +syntax that is recognized is the same as that for the @code{strtoul} +function (@pxref{Parsing of Integers}) with the appropriate value +(@code{8}, @code{10}, or @code{16}) for the @var{base} argument. + +The @samp{%X} conversion is identical to the @samp{%x} conversion. They +both permit either uppercase or lowercase letters to be used as digits. + +The default type of the corresponding argument for the @code{%d} and +@code{%i} conversions is @code{int *}, and @code{unsigned int *} for the +other integer conversions. You can use the following type modifiers to +specify other sizes of integer: + +@table @samp +@item hh +Specifies that the argument is a @code{signed char *} or @code{unsigned +char *}. + +This modifier was introduced in @w{ISO C99}. + +@item h +Specifies that the argument is a @code{short int *} or @code{unsigned +short int *}. + +@item j +Specifies that the argument is a @code{intmax_t *} or @code{uintmax_t *}. + +This modifier was introduced in @w{ISO C99}. + +@item l +Specifies that the argument is a @code{long int *} or @code{unsigned +long int *}. Two @samp{l} characters is like the @samp{L} modifier, below. + +If used with @samp{%c} or @samp{%s} the corresponding parameter is +considered as a pointer to a wide character or wide character string +respectively. This use of @samp{l} was introduced in @w{Amendment 1} to +@w{ISO C90}. + +@need 100 +@item ll +@itemx L +@itemx q +Specifies that the argument is a @code{long long int *} or @code{unsigned long long int *}. (The @code{long long} type is an extension supported by the +GNU C compiler. For systems that don't provide extra-long integers, this +is the same as @code{long int}.) + +The @samp{q} modifier is another name for the same thing, which comes +from 4.4 BSD; a @w{@code{long long int}} is sometimes called a ``quad'' +@code{int}. + +@item t +Specifies that the argument is a @code{ptrdiff_t *}. + +This modifier was introduced in @w{ISO C99}. + +@item z +Specifies that the argument is a @code{size_t *}. + +This modifier was introduced in @w{ISO C99}. +@end table + +All of the @samp{%e}, @samp{%f}, @samp{%g}, @samp{%E}, and @samp{%G} +input conversions are interchangeable. They all match an optionally +signed floating point number, in the same syntax as for the +@code{strtod} function (@pxref{Parsing of Floats}). + +For the floating-point input conversions, the default argument type is +@code{float *}. (This is different from the corresponding output +conversions, where the default type is @code{double}; remember that +@code{float} arguments to @code{printf} are converted to @code{double} +by the default argument promotions, but @code{float *} arguments are +not promoted to @code{double *}.) You can specify other sizes of float +using these type modifiers: + +@table @samp +@item l +Specifies that the argument is of type @code{double *}. + +@item L +Specifies that the argument is of type @code{long double *}. +@end table + +For all the above number parsing formats there is an additional optional +flag @samp{'}. When this flag is given the @code{scanf} function +expects the number represented in the input string to be formatted +according to the grouping rules of the currently selected locale +(@pxref{General Numeric}). + +If the @code{"C"} or @code{"POSIX"} locale is selected there is no +difference. But for a locale which specifies values for the appropriate +fields in the locale the input must have the correct form in the input. +Otherwise the longest prefix with a correct form is processed. + +@node String Input Conversions +@subsection String Input Conversions + +This section describes the @code{scanf} input conversions for reading +string and character values: @samp{%s}, @samp{%S}, @samp{%[}, @samp{%c}, +and @samp{%C}. + +You have two options for how to receive the input from these +conversions: + +@itemize @bullet +@item +Provide a buffer to store it in. This is the default. You should +provide an argument of type @code{char *} or @code{wchar_t *} (the +latter if the @samp{l} modifier is present). + +@strong{Warning:} To make a robust program, you must make sure that the +input (plus its terminating null) cannot possibly exceed the size of the +buffer you provide. In general, the only way to do this is to specify a +maximum field width one less than the buffer size. @strong{If you +provide the buffer, always specify a maximum field width to prevent +overflow.} + +@item +Ask @code{scanf} to allocate a big enough buffer, by specifying the +@samp{a} flag character. This is a GNU extension. You should provide +an argument of type @code{char **} for the buffer address to be stored +in. @xref{Dynamic String Input}. +@end itemize + +The @samp{%c} conversion is the simplest: it matches a fixed number of +characters, always. The maximum field width says how many characters to +read; if you don't specify the maximum, the default is 1. This +conversion doesn't append a null character to the end of the text it +reads. It also does not skip over initial whitespace characters. It +reads precisely the next @var{n} characters, and fails if it cannot get +that many. Since there is always a maximum field width with @samp{%c} +(whether specified, or 1 by default), you can always prevent overflow by +making the buffer long enough. +@comment Is character == byte here??? --drepper + +If the format is @samp{%lc} or @samp{%C} the function stores wide +characters which are converted using the conversion determined at the +time the stream was opened from the external byte stream. The number of +bytes read from the medium is limited by @code{MB_CUR_LEN * @var{n}} but +at most @var{n} wide characters get stored in the output string. + +The @samp{%s} conversion matches a string of non-whitespace characters. +It skips and discards initial whitespace, but stops when it encounters +more whitespace after having read something. It stores a null character +at the end of the text that it reads. + +For example, reading the input: + +@smallexample + hello, world +@end smallexample + +@noindent +with the conversion @samp{%10c} produces @code{" hello, wo"}, but +reading the same input with the conversion @samp{%10s} produces +@code{"hello,"}. + +@strong{Warning:} If you do not specify a field width for @samp{%s}, +then the number of characters read is limited only by where the next +whitespace character appears. This almost certainly means that invalid +input can make your program crash---which is a bug. + +The @samp{%ls} and @samp{%S} format are handled just like @samp{%s} +except that the external byte sequence is converted using the conversion +associated with the stream to wide characters with their own encoding. +A width or precision specified with the format do not directly determine +how many bytes are read from the stream since they measure wide +characters. But an upper limit can be computed by multiplying the value +of the width or precision by @code{MB_CUR_MAX}. + +To read in characters that belong to an arbitrary set of your choice, +use the @samp{%[} conversion. You specify the set between the @samp{[} +character and a following @samp{]} character, using the same syntax used +in regular expressions for explicit sets of characters. As special cases: + +@itemize @bullet +@item +A literal @samp{]} character can be specified as the first character +of the set. + +@item +An embedded @samp{-} character (that is, one that is not the first or +last character of the set) is used to specify a range of characters. + +@item +If a caret character @samp{^} immediately follows the initial @samp{[}, +then the set of allowed input characters is everything @emph{except} +the characters listed. +@end itemize + +The @samp{%[} conversion does not skip over initial whitespace +characters. + +Note that the @dfn{character class} syntax available in character sets +that appear inside regular expressions (such as @samp{[:alpha:]}) is +@emph{not} available in the @samp{%[} conversion. + +Here are some examples of @samp{%[} conversions and what they mean: + +@table @samp +@item %25[1234567890] +Matches a string of up to 25 digits. + +@item %25[][] +Matches a string of up to 25 square brackets. + +@item %25[^ \f\n\r\t\v] +Matches a string up to 25 characters long that doesn't contain any of +the standard whitespace characters. This is slightly different from +@samp{%s}, because if the input begins with a whitespace character, +@samp{%[} reports a matching failure while @samp{%s} simply discards the +initial whitespace. + +@item %25[a-z] +Matches up to 25 lowercase characters. +@end table + +As for @samp{%c} and @samp{%s} the @samp{%[} format is also modified to +produce wide characters if the @samp{l} modifier is present. All what +is said about @samp{%ls} above is true for @samp{%l[}. + +One more reminder: the @samp{%s} and @samp{%[} conversions are +@strong{dangerous} if you don't specify a maximum width or use the +@samp{a} flag, because input too long would overflow whatever buffer you +have provided for it. No matter how long your buffer is, a user could +supply input that is longer. A well-written program reports invalid +input with a comprehensible error message, not with a crash. + +@node Dynamic String Input +@subsection Dynamically Allocating String Conversions + +A GNU extension to formatted input lets you safely read a string with no +maximum size. Using this feature, you don't supply a buffer; instead, +@code{scanf} allocates a buffer big enough to hold the data and gives +you its address. To use this feature, write @samp{a} as a flag +character, as in @samp{%as} or @samp{%a[0-9a-z]}. + +The pointer argument you supply for where to store the input should have +type @code{char **}. The @code{scanf} function allocates a buffer and +stores its address in the word that the argument points to. You should +free the buffer with @code{free} when you no longer need it. + +Here is an example of using the @samp{a} flag with the @samp{%[@dots{}]} +conversion specification to read a ``variable assignment'' of the form +@samp{@var{variable} = @var{value}}. + +@smallexample +@{ + char *variable, *value; + + if (2 > scanf ("%a[a-zA-Z0-9] = %a[^\n]\n", + &variable, &value)) + @{ + invalid_input_error (); + return 0; + @} + + @dots{} +@} +@end smallexample + +@node Other Input Conversions +@subsection Other Input Conversions + +This section describes the miscellaneous input conversions. + +The @samp{%p} conversion is used to read a pointer value. It recognizes +the same syntax used by the @samp{%p} output conversion for +@code{printf} (@pxref{Other Output Conversions}); that is, a hexadecimal +number just as the @samp{%x} conversion accepts. The corresponding +argument should be of type @code{void **}; that is, the address of a +place to store a pointer. + +The resulting pointer value is not guaranteed to be valid if it was not +originally written during the same program execution that reads it in. + +The @samp{%n} conversion produces the number of characters read so far +by this call. The corresponding argument should be of type @code{int *}. +This conversion works in the same way as the @samp{%n} conversion for +@code{printf}; see @ref{Other Output Conversions}, for an example. + +The @samp{%n} conversion is the only mechanism for determining the +success of literal matches or conversions with suppressed assignments. +If the @samp{%n} follows the locus of a matching failure, then no value +is stored for it since @code{scanf} returns before processing the +@samp{%n}. If you store @code{-1} in that argument slot before calling +@code{scanf}, the presence of @code{-1} after @code{scanf} indicates an +error occurred before the @samp{%n} was reached. + +Finally, the @samp{%%} conversion matches a literal @samp{%} character +in the input stream, without using an argument. This conversion does +not permit any flags, field width, or type modifier to be specified. + +@node Formatted Input Functions +@subsection Formatted Input Functions + +Here are the descriptions of the functions for performing formatted +input. +Prototypes for these functions are in the header file @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypefun int scanf (const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +The @code{scanf} function reads formatted input from the stream +@code{stdin} under the control of the template string @var{template}. +The optional arguments are pointers to the places which receive the +resulting values. + +The return value is normally the number of successful assignments. If +an end-of-file condition is detected before any matches are performed, +including matches against whitespace and literal characters in the +template, then @code{EOF} is returned. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int wscanf (const wchar_t *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +The @code{wscanf} function reads formatted input from the stream +@code{stdin} under the control of the template string @var{template}. +The optional arguments are pointers to the places which receive the +resulting values. + +The return value is normally the number of successful assignments. If +an end-of-file condition is detected before any matches are performed, +including matches against whitespace and literal characters in the +template, then @code{WEOF} is returned. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int fscanf (FILE *@var{stream}, const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This function is just like @code{scanf}, except that the input is read +from the stream @var{stream} instead of @code{stdin}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int fwscanf (FILE *@var{stream}, const wchar_t *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This function is just like @code{wscanf}, except that the input is read +from the stream @var{stream} instead of @code{stdin}. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int sscanf (const char *@var{s}, const char *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is like @code{scanf}, except that the characters are taken from the +null-terminated string @var{s} instead of from a stream. Reaching the +end of the string is treated as an end-of-file condition. + +The behavior of this function is undefined if copying takes place +between objects that overlap---for example, if @var{s} is also given +as an argument to receive a string read under control of the @samp{%s}, +@samp{%S}, or @samp{%[} conversion. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int swscanf (const wchar_t *@var{ws}, const wchar_t *@var{template}, @dots{}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is like @code{wscanf}, except that the characters are taken from the +null-terminated string @var{ws} instead of from a stream. Reaching the +end of the string is treated as an end-of-file condition. + +The behavior of this function is undefined if copying takes place +between objects that overlap---for example, if @var{ws} is also given as +an argument to receive a string read under control of the @samp{%s}, +@samp{%S}, or @samp{%[} conversion. +@end deftypefun + +@node Variable Arguments Input +@subsection Variable Arguments Input Functions + +The functions @code{vscanf} and friends are provided so that you can +define your own variadic @code{scanf}-like functions that make use of +the same internals as the built-in formatted output functions. +These functions are analogous to the @code{vprintf} series of output +functions. @xref{Variable Arguments Output}, for important +information on how to use them. + +@strong{Portability Note:} The functions listed in this section were +introduced in @w{ISO C99} and were before available as GNU extensions. + +@comment stdio.h +@comment ISO +@deftypefun int vscanf (const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This function is similar to @code{scanf}, but instead of taking +a variable number of arguments directly, it takes an argument list +pointer @var{ap} of type @code{va_list} (@pxref{Variadic Functions}). +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int vwscanf (const wchar_t *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This function is similar to @code{wscanf}, but instead of taking +a variable number of arguments directly, it takes an argument list +pointer @var{ap} of type @code{va_list} (@pxref{Variadic Functions}). +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int vfscanf (FILE *@var{stream}, const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This is the equivalent of @code{fscanf} with the variable argument list +specified directly as for @code{vscanf}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int vfwscanf (FILE *@var{stream}, const wchar_t *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acsmem{} @aculock{} @acucorrupt{}}} +This is the equivalent of @code{fwscanf} with the variable argument list +specified directly as for @code{vwscanf}. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int vsscanf (const char *@var{s}, const char *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is the equivalent of @code{sscanf} with the variable argument list +specified directly as for @code{vscanf}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int vswscanf (const wchar_t *@var{s}, const wchar_t *@var{template}, va_list @var{ap}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This is the equivalent of @code{swscanf} with the variable argument list +specified directly as for @code{vwscanf}. +@end deftypefun + +In GNU C, there is a special construct you can use to let the compiler +know that a function uses a @code{scanf}-style format string. Then it +can check the number and types of arguments in each call to the +function, and warn you when they do not match the format string. +For details, see @ref{Function Attributes, , Declaring Attributes of Functions, +gcc.info, Using GNU CC}. + +@node EOF and Errors +@section End-Of-File and Errors + +@cindex end of file, on a stream +Many of the functions described in this chapter return the value of the +macro @code{EOF} to indicate unsuccessful completion of the operation. +Since @code{EOF} is used to report both end of file and random errors, +it's often better to use the @code{feof} function to check explicitly +for end of file and @code{ferror} to check for errors. These functions +check indicators that are part of the internal state of the stream +object, indicators set if the appropriate condition was detected by a +previous I/O operation on that stream. + +@comment stdio.h +@comment ISO +@deftypevr Macro int EOF +This macro is an integer value that is returned by a number of narrow +stream functions to indicate an end-of-file condition, or some other +error situation. With @theglibc{}, @code{EOF} is @code{-1}. In +other libraries, its value may be some other negative number. + +This symbol is declared in @file{stdio.h}. +@end deftypevr + +@comment wchar.h +@comment ISO +@deftypevr Macro int WEOF +This macro is an integer value that is returned by a number of wide +stream functions to indicate an end-of-file condition, or some other +error situation. With @theglibc{}, @code{WEOF} is @code{-1}. In +other libraries, its value may be some other negative number. + +This symbol is declared in @file{wchar.h}. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypefun int feof (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{}}} +The @code{feof} function returns nonzero if and only if the end-of-file +indicator for the stream @var{stream} is set. + +This symbol is declared in @file{stdio.h}. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int feof_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c There isn't much of a thread unsafety risk in reading a flag word and +@c testing a bit in it. +The @code{feof_unlocked} function is equivalent to the @code{feof} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. + +This symbol is declared in @file{stdio.h}. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int ferror (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{}}} +The @code{ferror} function returns nonzero if and only if the error +indicator for the stream @var{stream} is set, indicating that an error +has occurred on a previous operation on the stream. + +This symbol is declared in @file{stdio.h}. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun int ferror_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{ferror_unlocked} function is equivalent to the @code{ferror} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. + +This symbol is declared in @file{stdio.h}. +@end deftypefun + +In addition to setting the error indicator associated with the stream, +the functions that operate on streams also set @code{errno} in the same +way as the corresponding low-level functions that operate on file +descriptors. For example, all of the functions that perform output to a +stream---such as @code{fputc}, @code{printf}, and @code{fflush}---are +implemented in terms of @code{write}, and all of the @code{errno} error +conditions defined for @code{write} are meaningful for these functions. +For more information about the descriptor-level I/O functions, see +@ref{Low-Level I/O}. + +@node Error Recovery +@section Recovering from errors + +You may explicitly clear the error and EOF flags with the @code{clearerr} +function. + +@comment stdio.h +@comment ISO +@deftypefun void clearerr (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{}}} +This function clears the end-of-file and error indicators for the +stream @var{stream}. + +The file positioning functions (@pxref{File Positioning}) also clear the +end-of-file indicator for the stream. +@end deftypefun + +@comment stdio.h +@comment GNU +@deftypefun void clearerr_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@assafe{}@acsafe{}} +The @code{clearerr_unlocked} function is equivalent to the @code{clearerr} +function except that it does not implicitly lock the stream. + +This function is a GNU extension. +@end deftypefun + +Note that it is @emph{not} correct to just clear the error flag and retry +a failed stream operation. After a failed write, any number of +characters since the last buffer flush may have been committed to the +file, while some buffered data may have been discarded. Merely retrying +can thus cause lost or repeated data. + +A failed read may leave the file pointer in an inappropriate position for +a second try. In both cases, you should seek to a known position before +retrying. + +Most errors that can happen are not recoverable --- a second try will +always fail again in the same way. So usually it is best to give up and +report the error to the user, rather than install complicated recovery +logic. + +One important exception is @code{EINTR} (@pxref{Interrupted Primitives}). +Many stream I/O implementations will treat it as an ordinary error, which +can be quite inconvenient. You can avoid this hassle by installing all +signals with the @code{SA_RESTART} flag. + +For similar reasons, setting nonblocking I/O on a stream's file +descriptor is not usually advisable. + +@node Binary Streams +@section Text and Binary Streams + +@gnusystems{} and other POSIX-compatible operating systems organize all +files as uniform sequences of characters. However, some other systems +make a distinction between files containing text and files containing +binary data, and the input and output facilities of @w{ISO C} provide for +this distinction. This section tells you how to write programs portable +to such systems. + +@cindex text stream +@cindex binary stream +When you open a stream, you can specify either a @dfn{text stream} or a +@dfn{binary stream}. You indicate that you want a binary stream by +specifying the @samp{b} modifier in the @var{opentype} argument to +@code{fopen}; see @ref{Opening Streams}. Without this +option, @code{fopen} opens the file as a text stream. + +Text and binary streams differ in several ways: + +@itemize @bullet +@item +The data read from a text stream is divided into @dfn{lines} which are +terminated by newline (@code{'\n'}) characters, while a binary stream is +simply a long series of characters. A text stream might on some systems +fail to handle lines more than 254 characters long (including the +terminating newline character). +@cindex lines (in a text file) + +@item +On some systems, text files can contain only printing characters, +horizontal tab characters, and newlines, and so text streams may not +support other characters. However, binary streams can handle any +character value. + +@item +Space characters that are written immediately preceding a newline +character in a text stream may disappear when the file is read in again. + +@item +More generally, there need not be a one-to-one mapping between +characters that are read from or written to a text stream, and the +characters in the actual file. +@end itemize + +Since a binary stream is always more capable and more predictable than a +text stream, you might wonder what purpose text streams serve. Why not +simply always use binary streams? The answer is that on these operating +systems, text and binary streams use different file formats, and the +only way to read or write ``an ordinary file of text'' that can work +with other text-oriented programs is through a text stream. + +In @theglibc{}, and on all POSIX systems, there is no difference +between text streams and binary streams. When you open a stream, you +get the same kind of stream regardless of whether you ask for binary. +This stream can handle any file content, and has none of the +restrictions that text streams sometimes have. + +@node File Positioning +@section File Positioning +@cindex file positioning on a stream +@cindex positioning a stream +@cindex seeking on a stream + +The @dfn{file position} of a stream describes where in the file the +stream is currently reading or writing. I/O on the stream advances the +file position through the file. On @gnusystems{}, the file position is +represented as an integer, which counts the number of bytes from the +beginning of the file. @xref{File Position}. + +During I/O to an ordinary disk file, you can change the file position +whenever you wish, so as to read or write any portion of the file. Some +other kinds of files may also permit this. Files which support changing +the file position are sometimes referred to as @dfn{random-access} +files. + +You can use the functions in this section to examine or modify the file +position indicator associated with a stream. The symbols listed below +are declared in the header file @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypefun {long int} ftell (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function returns the current file position of the stream +@var{stream}. + +This function can fail if the stream doesn't support file positioning, +or if the file position can't be represented in a @code{long int}, and +possibly for other reasons as well. If a failure occurs, a value of +@code{-1} is returned. +@end deftypefun + +@comment stdio.h +@comment Unix98 +@deftypefun off_t ftello (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{ftello} function is similar to @code{ftell}, except that it +returns a value of type @code{off_t}. Systems which support this type +use it to describe all file positions, unlike the POSIX specification +which uses a long int. The two are not necessarily the same size. +Therefore, using ftell can lead to problems if the implementation is +written on top of a POSIX compliant low-level I/O implementation, and using +@code{ftello} is preferable whenever it is available. + +If this function fails it returns @code{(off_t) -1}. This can happen due +to missing support for file positioning or internal errors. Otherwise +the return value is the current file position. + +The function is an extension defined in the Unix Single Specification +version 2. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bit system this function is in fact @code{ftello64}. I.e., the +LFS interface transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment Unix98 +@deftypefun off64_t ftello64 (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function is similar to @code{ftello} with the only difference that +the return value is of type @code{off64_t}. This also requires that the +stream @var{stream} was opened using either @code{fopen64}, +@code{freopen64}, or @code{tmpfile64} since otherwise the underlying +file operations to position the file pointer beyond the @twoexp{31} +bytes limit might fail. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a 32 +bits machine this function is available under the name @code{ftello} +and so transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int fseek (FILE *@var{stream}, long int @var{offset}, int @var{whence}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{fseek} function is used to change the file position of the +stream @var{stream}. The value of @var{whence} must be one of the +constants @code{SEEK_SET}, @code{SEEK_CUR}, or @code{SEEK_END}, to +indicate whether the @var{offset} is relative to the beginning of the +file, the current file position, or the end of the file, respectively. + +This function returns a value of zero if the operation was successful, +and a nonzero value to indicate failure. A successful call also clears +the end-of-file indicator of @var{stream} and discards any characters +that were ``pushed back'' by the use of @code{ungetc}. + +@code{fseek} either flushes any buffered output before setting the file +position or else remembers it so it will be written later in its proper +place in the file. +@end deftypefun + +@comment stdio.h +@comment Unix98 +@deftypefun int fseeko (FILE *@var{stream}, off_t @var{offset}, int @var{whence}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function is similar to @code{fseek} but it corrects a problem with +@code{fseek} in a system with POSIX types. Using a value of type +@code{long int} for the offset is not compatible with POSIX. +@code{fseeko} uses the correct type @code{off_t} for the @var{offset} +parameter. + +For this reason it is a good idea to prefer @code{ftello} whenever it is +available since its functionality is (if different at all) closer the +underlying definition. + +The functionality and return value are the same as for @code{fseek}. + +The function is an extension defined in the Unix Single Specification +version 2. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bit system this function is in fact @code{fseeko64}. I.e., the +LFS interface transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment Unix98 +@deftypefun int fseeko64 (FILE *@var{stream}, off64_t @var{offset}, int @var{whence}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function is similar to @code{fseeko} with the only difference that +the @var{offset} parameter is of type @code{off64_t}. This also +requires that the stream @var{stream} was opened using either +@code{fopen64}, @code{freopen64}, or @code{tmpfile64} since otherwise +the underlying file operations to position the file pointer beyond the +@twoexp{31} bytes limit might fail. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a 32 +bits machine this function is available under the name @code{fseeko} +and so transparently replaces the old interface. +@end deftypefun + +@strong{Portability Note:} In non-POSIX systems, @code{ftell}, +@code{ftello}, @code{fseek} and @code{fseeko} might work reliably only +on binary streams. @xref{Binary Streams}. + +The following symbolic constants are defined for use as the @var{whence} +argument to @code{fseek}. They are also used with the @code{lseek} +function (@pxref{I/O Primitives}) and to specify offsets for file locks +(@pxref{Control Operations}). + +@comment stdio.h +@comment ISO +@deftypevr Macro int SEEK_SET +This is an integer constant which, when used as the @var{whence} +argument to the @code{fseek} or @code{fseeko} functions, specifies that +the offset provided is relative to the beginning of the file. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypevr Macro int SEEK_CUR +This is an integer constant which, when used as the @var{whence} +argument to the @code{fseek} or @code{fseeko} functions, specifies that +the offset provided is relative to the current file position. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypevr Macro int SEEK_END +This is an integer constant which, when used as the @var{whence} +argument to the @code{fseek} or @code{fseeko} functions, specifies that +the offset provided is relative to the end of the file. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypefun void rewind (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{rewind} function positions the stream @var{stream} at the +beginning of the file. It is equivalent to calling @code{fseek} or +@code{fseeko} on the @var{stream} with an @var{offset} argument of +@code{0L} and a @var{whence} argument of @code{SEEK_SET}, except that +the return value is discarded and the error indicator for the stream is +reset. +@end deftypefun + +These three aliases for the @samp{SEEK_@dots{}} constants exist for the +sake of compatibility with older BSD systems. They are defined in two +different header files: @file{fcntl.h} and @file{sys/file.h}. + +@vtable @code +@comment sys/file.h +@comment BSD +@item L_SET +An alias for @code{SEEK_SET}. + +@comment sys/file.h +@comment BSD +@item L_INCR +An alias for @code{SEEK_CUR}. + +@comment sys/file.h +@comment BSD +@item L_XTND +An alias for @code{SEEK_END}. +@end vtable + +@node Portable Positioning +@section Portable File-Position Functions + +On @gnusystems{}, the file position is truly a character count. You +can specify any character count value as an argument to @code{fseek} or +@code{fseeko} and get reliable results for any random access file. +However, some @w{ISO C} systems do not represent file positions in this +way. + +On some systems where text streams truly differ from binary streams, it +is impossible to represent the file position of a text stream as a count +of characters from the beginning of the file. For example, the file +position on some systems must encode both a record offset within the +file, and a character offset within the record. + +As a consequence, if you want your programs to be portable to these +systems, you must observe certain rules: + +@itemize @bullet +@item +The value returned from @code{ftell} on a text stream has no predictable +relationship to the number of characters you have read so far. The only +thing you can rely on is that you can use it subsequently as the +@var{offset} argument to @code{fseek} or @code{fseeko} to move back to +the same file position. + +@item +In a call to @code{fseek} or @code{fseeko} on a text stream, either the +@var{offset} must be zero, or @var{whence} must be @code{SEEK_SET} and +the @var{offset} must be the result of an earlier call to @code{ftell} +on the same stream. + +@item +The value of the file position indicator of a text stream is undefined +while there are characters that have been pushed back with @code{ungetc} +that haven't been read or discarded. @xref{Unreading}. +@end itemize + +But even if you observe these rules, you may still have trouble for long +files, because @code{ftell} and @code{fseek} use a @code{long int} value +to represent the file position. This type may not have room to encode +all the file positions in a large file. Using the @code{ftello} and +@code{fseeko} functions might help here since the @code{off_t} type is +expected to be able to hold all file position values but this still does +not help to handle additional information which must be associated with +a file position. + +So if you do want to support systems with peculiar encodings for the +file positions, it is better to use the functions @code{fgetpos} and +@code{fsetpos} instead. These functions represent the file position +using the data type @code{fpos_t}, whose internal representation varies +from system to system. + +These symbols are declared in the header file @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftp {Data Type} fpos_t +This is the type of an object that can encode information about the +file position of a stream, for use by the functions @code{fgetpos} and +@code{fsetpos}. + +In @theglibc{}, @code{fpos_t} is an opaque data structure that +contains internal data to represent file offset and conversion state +information. In other systems, it might have a different internal +representation. + +When compiling with @code{_FILE_OFFSET_BITS == 64} on a 32 bit machine +this type is in fact equivalent to @code{fpos64_t} since the LFS +interface transparently replaces the old interface. +@end deftp + +@comment stdio.h +@comment Unix98 +@deftp {Data Type} fpos64_t +This is the type of an object that can encode information about the +file position of a stream, for use by the functions @code{fgetpos64} and +@code{fsetpos64}. + +In @theglibc{}, @code{fpos64_t} is an opaque data structure that +contains internal data to represent file offset and conversion state +information. In other systems, it might have a different internal +representation. +@end deftp + +@comment stdio.h +@comment ISO +@deftypefun int fgetpos (FILE *@var{stream}, fpos_t *@var{position}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function stores the value of the file position indicator for the +stream @var{stream} in the @code{fpos_t} object pointed to by +@var{position}. If successful, @code{fgetpos} returns zero; otherwise +it returns a nonzero value and stores an implementation-defined positive +value in @code{errno}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bit system the function is in fact @code{fgetpos64}. I.e., the LFS +interface transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment Unix98 +@deftypefun int fgetpos64 (FILE *@var{stream}, fpos64_t *@var{position}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function is similar to @code{fgetpos} but the file position is +returned in a variable of type @code{fpos64_t} to which @var{position} +points. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a 32 +bits machine this function is available under the name @code{fgetpos} +and so transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypefun int fsetpos (FILE *@var{stream}, const fpos_t *@var{position}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function sets the file position indicator for the stream @var{stream} +to the position @var{position}, which must have been set by a previous +call to @code{fgetpos} on the same stream. If successful, @code{fsetpos} +clears the end-of-file indicator on the stream, discards any characters +that were ``pushed back'' by the use of @code{ungetc}, and returns a value +of zero. Otherwise, @code{fsetpos} returns a nonzero value and stores +an implementation-defined positive value in @code{errno}. + +When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a +32 bit system the function is in fact @code{fsetpos64}. I.e., the LFS +interface transparently replaces the old interface. +@end deftypefun + +@comment stdio.h +@comment Unix98 +@deftypefun int fsetpos64 (FILE *@var{stream}, const fpos64_t *@var{position}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function is similar to @code{fsetpos} but the file position used +for positioning is provided in a variable of type @code{fpos64_t} to +which @var{position} points. + +If the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a 32 +bits machine this function is available under the name @code{fsetpos} +and so transparently replaces the old interface. +@end deftypefun + +@node Stream Buffering +@section Stream Buffering + +@cindex buffering of streams +Characters that are written to a stream are normally accumulated and +transmitted asynchronously to the file in a block, instead of appearing +as soon as they are output by the application program. Similarly, +streams often retrieve input from the host environment in blocks rather +than on a character-by-character basis. This is called @dfn{buffering}. + +If you are writing programs that do interactive input and output using +streams, you need to understand how buffering works when you design the +user interface to your program. Otherwise, you might find that output +(such as progress or prompt messages) doesn't appear when you intended +it to, or displays some other unexpected behavior. + +This section deals only with controlling when characters are transmitted +between the stream and the file or device, and @emph{not} with how +things like echoing, flow control, and the like are handled on specific +classes of devices. For information on common control operations on +terminal devices, see @ref{Low-Level Terminal Interface}. + +You can bypass the stream buffering facilities altogether by using the +low-level input and output functions that operate on file descriptors +instead. @xref{Low-Level I/O}. + +@menu +* Buffering Concepts:: Terminology is defined here. +* Flushing Buffers:: How to ensure that output buffers are flushed. +* Controlling Buffering:: How to specify what kind of buffering to use. +@end menu + +@node Buffering Concepts +@subsection Buffering Concepts + +There are three different kinds of buffering strategies: + +@itemize @bullet +@item +Characters written to or read from an @dfn{unbuffered} stream are +transmitted individually to or from the file as soon as possible. +@cindex unbuffered stream + +@item +Characters written to a @dfn{line buffered} stream are transmitted to +the file in blocks when a newline character is encountered. +@cindex line buffered stream + +@item +Characters written to or read from a @dfn{fully buffered} stream are +transmitted to or from the file in blocks of arbitrary size. +@cindex fully buffered stream +@end itemize + +Newly opened streams are normally fully buffered, with one exception: a +stream connected to an interactive device such as a terminal is +initially line buffered. @xref{Controlling Buffering}, for information +on how to select a different kind of buffering. Usually the automatic +selection gives you the most convenient kind of buffering for the file +or device you open. + +The use of line buffering for interactive devices implies that output +messages ending in a newline will appear immediately---which is usually +what you want. Output that doesn't end in a newline might or might not +show up immediately, so if you want them to appear immediately, you +should flush buffered output explicitly with @code{fflush}, as described +in @ref{Flushing Buffers}. + +@node Flushing Buffers +@subsection Flushing Buffers + +@cindex flushing a stream +@dfn{Flushing} output on a buffered stream means transmitting all +accumulated characters to the file. There are many circumstances when +buffered output on a stream is flushed automatically: + +@itemize @bullet +@item +When you try to do output and the output buffer is full. + +@item +When the stream is closed. @xref{Closing Streams}. + +@item +When the program terminates by calling @code{exit}. +@xref{Normal Termination}. + +@item +When a newline is written, if the stream is line buffered. + +@item +Whenever an input operation on @emph{any} stream actually reads data +from its file. +@end itemize + +If you want to flush the buffered output at another time, call +@code{fflush}, which is declared in the header file @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypefun int fflush (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function causes any buffered output on @var{stream} to be delivered +to the file. If @var{stream} is a null pointer, then +@code{fflush} causes buffered output on @emph{all} open output streams +to be flushed. + +This function returns @code{EOF} if a write error occurs, or zero +otherwise. +@end deftypefun + +@comment stdio.h +@comment POSIX +@deftypefun int fflush_unlocked (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{fflush_unlocked} function is equivalent to the @code{fflush} +function except that it does not implicitly lock the stream. +@end deftypefun + +The @code{fflush} function can be used to flush all streams currently +opened. While this is useful in some situations it does often more than +necessary since it might be done in situations when terminal input is +required and the program wants to be sure that all output is visible on +the terminal. But this means that only line buffered streams have to be +flushed. Solaris introduced a function especially for this. It was +always available in @theglibc{} in some form but never officially +exported. + +@comment stdio_ext.h +@comment GNU +@deftypefun void _flushlbf (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +The @code{_flushlbf} function flushes all line buffered streams +currently opened. + +This function is declared in the @file{stdio_ext.h} header. +@end deftypefun + +@strong{Compatibility Note:} Some brain-damaged operating systems have +been known to be so thoroughly fixated on line-oriented input and output +that flushing a line buffered stream causes a newline to be written! +Fortunately, this ``feature'' seems to be becoming less common. You do +not need to worry about this with @theglibc{}. + +In some situations it might be useful to not flush the output pending +for a stream but instead simply forget it. If transmission is costly +and the output is not needed anymore this is valid reasoning. In this +situation a non-standard function introduced in Solaris and available in +@theglibc{} can be used. + +@comment stdio_ext.h +@comment GNU +@deftypefun void __fpurge (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +The @code{__fpurge} function causes the buffer of the stream +@var{stream} to be emptied. If the stream is currently in read mode all +input in the buffer is lost. If the stream is in output mode the +buffered output is not written to the device (or whatever other +underlying storage) and the buffer is cleared. + +This function is declared in @file{stdio_ext.h}. +@end deftypefun + +@node Controlling Buffering +@subsection Controlling Which Kind of Buffering + +After opening a stream (but before any other operations have been +performed on it), you can explicitly specify what kind of buffering you +want it to have using the @code{setvbuf} function. +@cindex buffering, controlling + +The facilities listed in this section are declared in the header +file @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment ISO +@deftypefun int setvbuf (FILE *@var{stream}, char *@var{buf}, int @var{mode}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function is used to specify that the stream @var{stream} should +have the buffering mode @var{mode}, which can be either @code{_IOFBF} +(for full buffering), @code{_IOLBF} (for line buffering), or +@code{_IONBF} (for unbuffered input/output). + +If you specify a null pointer as the @var{buf} argument, then @code{setvbuf} +allocates a buffer itself using @code{malloc}. This buffer will be freed +when you close the stream. + +Otherwise, @var{buf} should be a character array that can hold at least +@var{size} characters. You should not free the space for this array as +long as the stream remains open and this array remains its buffer. You +should usually either allocate it statically, or @code{malloc} +(@pxref{Unconstrained Allocation}) the buffer. Using an automatic array +is not a good idea unless you close the file before exiting the block +that declares the array. + +While the array remains a stream buffer, the stream I/O functions will +use the buffer for their internal purposes. You shouldn't try to access +the values in the array directly while the stream is using it for +buffering. + +The @code{setvbuf} function returns zero on success, or a nonzero value +if the value of @var{mode} is not valid or if the request could not +be honored. +@end deftypefun + +@comment stdio.h +@comment ISO +@deftypevr Macro int _IOFBF +The value of this macro is an integer constant expression that can be +used as the @var{mode} argument to the @code{setvbuf} function to +specify that the stream should be fully buffered. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypevr Macro int _IOLBF +The value of this macro is an integer constant expression that can be +used as the @var{mode} argument to the @code{setvbuf} function to +specify that the stream should be line buffered. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypevr Macro int _IONBF +The value of this macro is an integer constant expression that can be +used as the @var{mode} argument to the @code{setvbuf} function to +specify that the stream should be unbuffered. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypevr Macro int BUFSIZ +The value of this macro is an integer constant expression that is good +to use for the @var{size} argument to @code{setvbuf}. This value is +guaranteed to be at least @code{256}. + +The value of @code{BUFSIZ} is chosen on each system so as to make stream +I/O efficient. So it is a good idea to use @code{BUFSIZ} as the size +for the buffer when you call @code{setvbuf}. + +Actually, you can get an even better value to use for the buffer size +by means of the @code{fstat} system call: it is found in the +@code{st_blksize} field of the file attributes. @xref{Attribute Meanings}. + +Sometimes people also use @code{BUFSIZ} as the allocation size of +buffers used for related purposes, such as strings used to receive a +line of input with @code{fgets} (@pxref{Character Input}). There is no +particular reason to use @code{BUFSIZ} for this instead of any other +integer, except that it might lead to doing I/O in chunks of an +efficient size. +@end deftypevr + +@comment stdio.h +@comment ISO +@deftypefun void setbuf (FILE *@var{stream}, char *@var{buf}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +If @var{buf} is a null pointer, the effect of this function is +equivalent to calling @code{setvbuf} with a @var{mode} argument of +@code{_IONBF}. Otherwise, it is equivalent to calling @code{setvbuf} +with @var{buf}, and a @var{mode} of @code{_IOFBF} and a @var{size} +argument of @code{BUFSIZ}. + +The @code{setbuf} function is provided for compatibility with old code; +use @code{setvbuf} in all new programs. +@end deftypefun + +@comment stdio.h +@comment BSD +@deftypefun void setbuffer (FILE *@var{stream}, char *@var{buf}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +If @var{buf} is a null pointer, this function makes @var{stream} unbuffered. +Otherwise, it makes @var{stream} fully buffered using @var{buf} as the +buffer. The @var{size} argument specifies the length of @var{buf}. + +This function is provided for compatibility with old BSD code. Use +@code{setvbuf} instead. +@end deftypefun + +@comment stdio.h +@comment BSD +@deftypefun void setlinebuf (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +This function makes @var{stream} be line buffered, and allocates the +buffer for you. + +This function is provided for compatibility with old BSD code. Use +@code{setvbuf} instead. +@end deftypefun + +It is possible to query whether a given stream is line buffered or not +using a non-standard function introduced in Solaris and available in +@theglibc{}. + +@comment stdio_ext.h +@comment GNU +@deftypefun int __flbf (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{__flbf} function will return a nonzero value in case the +stream @var{stream} is line buffered. Otherwise the return value is +zero. + +This function is declared in the @file{stdio_ext.h} header. +@end deftypefun + +Two more extensions allow to determine the size of the buffer and how +much of it is used. These functions were also introduced in Solaris. + +@comment stdio_ext.h +@comment GNU +@deftypefun size_t __fbufsize (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acsafe{}} +The @code{__fbufsize} function return the size of the buffer in the +stream @var{stream}. This value can be used to optimize the use of the +stream. + +This function is declared in the @file{stdio_ext.h} header. +@end deftypefun + +@comment stdio_ext.h +@comment GNU +@deftypefun size_t __fpending (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream}}@asunsafe{@asucorrupt{}}@acsafe{}} +The @code{__fpending} +function returns the number of bytes currently in the output buffer. +For wide-oriented streams the measuring unit is wide characters. This +function should not be used on buffers in read mode or opened read-only. + +This function is declared in the @file{stdio_ext.h} header. +@end deftypefun + +@node Other Kinds of Streams +@section Other Kinds of Streams + +@Theglibc{} provides ways for you to define additional kinds of +streams that do not necessarily correspond to an open file. + +One such type of stream takes input from or writes output to a string. +These kinds of streams are used internally to implement the +@code{sprintf} and @code{sscanf} functions. You can also create such a +stream explicitly, using the functions described in @ref{String Streams}. + +More generally, you can define streams that do input/output to arbitrary +objects using functions supplied by your program. This protocol is +discussed in @ref{Custom Streams}. + +@strong{Portability Note:} The facilities described in this section are +specific to GNU. Other systems or C implementations might or might not +provide equivalent functionality. + +@menu +* String Streams:: Streams that get data from or put data in + a string or memory buffer. +* Custom Streams:: Defining your own streams with an arbitrary + input data source and/or output data sink. +@end menu + +@node String Streams +@subsection String Streams + +@cindex stream, for I/O to a string +@cindex string stream +The @code{fmemopen} and @code{open_memstream} functions allow you to do +I/O to a string or memory buffer. These facilities are declared in +@file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment GNU +@deftypefun {FILE *} fmemopen (void *@var{buf}, size_t @var{size}, const char *@var{opentype}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @aculock{}}} +@c Unlike open_memstream, fmemopen does (indirectly) call _IO_link_in, +@c bringing with it additional potential for async trouble with +@c list_all_lock. +This function opens a stream that allows the access specified by the +@var{opentype} argument, that reads from or writes to the buffer specified +by the argument @var{buf}. This array must be at least @var{size} bytes long. + +If you specify a null pointer as the @var{buf} argument, @code{fmemopen} +dynamically allocates an array @var{size} bytes long (as with @code{malloc}; +@pxref{Unconstrained Allocation}). This is really only useful +if you are going to write things to the buffer and then read them back +in again, because you have no way of actually getting a pointer to the +buffer (for this, try @code{open_memstream}, below). The buffer is +freed when the stream is closed. + +The argument @var{opentype} is the same as in @code{fopen} +(@pxref{Opening Streams}). If the @var{opentype} specifies +append mode, then the initial file position is set to the first null +character in the buffer. Otherwise the initial file position is at the +beginning of the buffer. + +When a stream open for writing is flushed or closed, a null character +(zero byte) is written at the end of the buffer if it fits. You +should add an extra byte to the @var{size} argument to account for this. +Attempts to write more than @var{size} bytes to the buffer result +in an error. + +For a stream open for reading, null characters (zero bytes) in the +buffer do not count as ``end of file''. Read operations indicate end of +file only when the file position advances past @var{size} bytes. So, if +you want to read characters from a null-terminated string, you should +supply the length of the string as the @var{size} argument. +@end deftypefun + +Here is an example of using @code{fmemopen} to create a stream for +reading from a string: + +@smallexample +@include memopen.c.texi +@end smallexample + +This program produces the following output: + +@smallexample +Got f +Got o +Got o +Got b +Got a +Got r +@end smallexample + +@comment stdio.h +@comment GNU +@deftypefun {FILE *} open_memstream (char **@var{ptr}, size_t *@var{sizeloc}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This function opens a stream for writing to a buffer. The buffer is +allocated dynamically and grown as necessary, using @code{malloc}. +After you've closed the stream, this buffer is your responsibility to +clean up using @code{free} or @code{realloc}. @xref{Unconstrained Allocation}. + +When the stream is closed with @code{fclose} or flushed with +@code{fflush}, the locations @var{ptr} and @var{sizeloc} are updated to +contain the pointer to the buffer and its size. The values thus stored +remain valid only as long as no further output on the stream takes +place. If you do more output, you must flush the stream again to store +new values before you use them again. + +A null character is written at the end of the buffer. This null character +is @emph{not} included in the size value stored at @var{sizeloc}. + +You can move the stream's file position with @code{fseek} or +@code{fseeko} (@pxref{File Positioning}). Moving the file position past +the end of the data already written fills the intervening space with +zeroes. +@end deftypefun + +Here is an example of using @code{open_memstream}: + +@smallexample +@include memstrm.c.texi +@end smallexample + +This program produces the following output: + +@smallexample +buf = `hello', size = 5 +buf = `hello, world', size = 12 +@end smallexample + +@node Custom Streams +@subsection Programming Your Own Custom Streams +@cindex custom streams +@cindex programming your own streams + +This section describes how you can make a stream that gets input from an +arbitrary data source or writes output to an arbitrary data sink +programmed by you. We call these @dfn{custom streams}. The functions +and types described here are all GNU extensions. + +@c !!! this does not talk at all about the higher-level hooks + +@menu +* Streams and Cookies:: The @dfn{cookie} records where to fetch or + store data that is read or written. +* Hook Functions:: How you should define the four @dfn{hook + functions} that a custom stream needs. +@end menu + +@node Streams and Cookies +@subsubsection Custom Streams and Cookies +@cindex cookie, for custom stream + +Inside every custom stream is a special object called the @dfn{cookie}. +This is an object supplied by you which records where to fetch or store +the data read or written. It is up to you to define a data type to use +for the cookie. The stream functions in the library never refer +directly to its contents, and they don't even know what the type is; +they record its address with type @code{void *}. + +To implement a custom stream, you must specify @emph{how} to fetch or +store the data in the specified place. You do this by defining +@dfn{hook functions} to read, write, change ``file position'', and close +the stream. All four of these functions will be passed the stream's +cookie so they can tell where to fetch or store the data. The library +functions don't know what's inside the cookie, but your functions will +know. + +When you create a custom stream, you must specify the cookie pointer, +and also the four hook functions stored in a structure of type +@code{cookie_io_functions_t}. + +These facilities are declared in @file{stdio.h}. +@pindex stdio.h + +@comment stdio.h +@comment GNU +@deftp {Data Type} {cookie_io_functions_t} +This is a structure type that holds the functions that define the +communications protocol between the stream and its cookie. It has +the following members: + +@table @code +@item cookie_read_function_t *read +This is the function that reads data from the cookie. If the value is a +null pointer instead of a function, then read operations on this stream +always return @code{EOF}. + +@item cookie_write_function_t *write +This is the function that writes data to the cookie. If the value is a +null pointer instead of a function, then data written to the stream is +discarded. + +@item cookie_seek_function_t *seek +This is the function that performs the equivalent of file positioning on +the cookie. If the value is a null pointer instead of a function, calls +to @code{fseek} or @code{fseeko} on this stream can only seek to +locations within the buffer; any attempt to seek outside the buffer will +return an @code{ESPIPE} error. + +@item cookie_close_function_t *close +This function performs any appropriate cleanup on the cookie when +closing the stream. If the value is a null pointer instead of a +function, nothing special is done to close the cookie when the stream is +closed. +@end table +@end deftp + +@comment stdio.h +@comment GNU +@deftypefun {FILE *} fopencookie (void *@var{cookie}, const char *@var{opentype}, cookie_io_functions_t @var{io-functions}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @aculock{}}} +This function actually creates the stream for communicating with the +@var{cookie} using the functions in the @var{io-functions} argument. +The @var{opentype} argument is interpreted as for @code{fopen}; +see @ref{Opening Streams}. (But note that the ``truncate on +open'' option is ignored.) The new stream is fully buffered. + +The @code{fopencookie} function returns the newly created stream, or a null +pointer in case of an error. +@end deftypefun + +@node Hook Functions +@subsubsection Custom Stream Hook Functions +@cindex hook functions (of custom streams) + +Here are more details on how you should define the four hook functions +that a custom stream needs. + +You should define the function to read data from the cookie as: + +@smallexample +ssize_t @var{reader} (void *@var{cookie}, char *@var{buffer}, size_t @var{size}) +@end smallexample + +This is very similar to the @code{read} function; see @ref{I/O +Primitives}. Your function should transfer up to @var{size} bytes into +the @var{buffer}, and return the number of bytes read, or zero to +indicate end-of-file. You can return a value of @code{-1} to indicate +an error. + +You should define the function to write data to the cookie as: + +@smallexample +ssize_t @var{writer} (void *@var{cookie}, const char *@var{buffer}, size_t @var{size}) +@end smallexample + +This is very similar to the @code{write} function; see @ref{I/O +Primitives}. Your function should transfer up to @var{size} bytes from +the buffer, and return the number of bytes written. You can return a +value of @code{0} to indicate an error. You must not return any +negative value. + +You should define the function to perform seek operations on the cookie +as: + +@smallexample +int @var{seeker} (void *@var{cookie}, off64_t *@var{position}, int @var{whence}) +@end smallexample + +For this function, the @var{position} and @var{whence} arguments are +interpreted as for @code{fgetpos}; see @ref{Portable Positioning}. + +After doing the seek operation, your function should store the resulting +file position relative to the beginning of the file in @var{position}. +Your function should return a value of @code{0} on success and @code{-1} +to indicate an error. + +You should define the function to do cleanup operations on the cookie +appropriate for closing the stream as: + +@smallexample +int @var{cleaner} (void *@var{cookie}) +@end smallexample + +Your function should return @code{-1} to indicate an error, and @code{0} +otherwise. + +@comment stdio.h +@comment GNU +@deftp {Data Type} cookie_read_function_t +This is the data type that the read function for a custom stream should have. +If you declare the function as shown above, this is the type it will have. +@end deftp + +@comment stdio.h +@comment GNU +@deftp {Data Type} cookie_write_function_t +The data type of the write function for a custom stream. +@end deftp + +@comment stdio.h +@comment GNU +@deftp {Data Type} cookie_seek_function_t +The data type of the seek function for a custom stream. +@end deftp + +@comment stdio.h +@comment GNU +@deftp {Data Type} cookie_close_function_t +The data type of the close function for a custom stream. +@end deftp + +@ignore +Roland says: + +@quotation +There is another set of functions one can give a stream, the +input-room and output-room functions. These functions must +understand stdio internals. To describe how to use these +functions, you also need to document lots of how stdio works +internally (which isn't relevant for other uses of stdio). +Perhaps I can write an interface spec from which you can write +good documentation. But it's pretty complex and deals with lots +of nitty-gritty details. I think it might be better to let this +wait until the rest of the manual is more done and polished. +@end quotation +@end ignore + +@c ??? This section could use an example. + + +@node Formatted Messages +@section Formatted Messages +@cindex formatted messages + +On systems which are based on System V messages of programs (especially +the system tools) are printed in a strict form using the @code{fmtmsg} +function. The uniformity sometimes helps the user to interpret messages +and the strictness tests of the @code{fmtmsg} function ensure that the +programmer follows some minimal requirements. + +@menu +* Printing Formatted Messages:: The @code{fmtmsg} function. +* Adding Severity Classes:: Add more severity classes. +* Example:: How to use @code{fmtmsg} and @code{addseverity}. +@end menu + + +@node Printing Formatted Messages +@subsection Printing Formatted Messages + +Messages can be printed to standard error and/or to the console. To +select the destination the programmer can use the following two values, +bitwise OR combined if wanted, for the @var{classification} parameter of +@code{fmtmsg}: + +@vtable @code +@item MM_PRINT +Display the message in standard error. +@item MM_CONSOLE +Display the message on the system console. +@end vtable + +The erroneous piece of the system can be signalled by exactly one of the +following values which also is bitwise ORed with the +@var{classification} parameter to @code{fmtmsg}: + +@vtable @code +@item MM_HARD +The source of the condition is some hardware. +@item MM_SOFT +The source of the condition is some software. +@item MM_FIRM +The source of the condition is some firmware. +@end vtable + +A third component of the @var{classification} parameter to @code{fmtmsg} +can describe the part of the system which detects the problem. This is +done by using exactly one of the following values: + +@vtable @code +@item MM_APPL +The erroneous condition is detected by the application. +@item MM_UTIL +The erroneous condition is detected by a utility. +@item MM_OPSYS +The erroneous condition is detected by the operating system. +@end vtable + +A last component of @var{classification} can signal the results of this +message. Exactly one of the following values can be used: + +@vtable @code +@item MM_RECOVER +It is a recoverable error. +@item MM_NRECOV +It is a non-recoverable error. +@end vtable + +@comment fmtmsg.h +@comment XPG +@deftypefun int fmtmsg (long int @var{classification}, const char *@var{label}, int @var{severity}, const char *@var{text}, const char *@var{action}, const char *@var{tag}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acsafe{}} +Display a message described by its parameters on the device(s) specified +in the @var{classification} parameter. The @var{label} parameter +identifies the source of the message. The string should consist of two +colon separated parts where the first part has not more than 10 and the +second part not more than 14 characters. The @var{text} parameter +describes the condition of the error, the @var{action} parameter possible +steps to recover from the error and the @var{tag} parameter is a +reference to the online documentation where more information can be +found. It should contain the @var{label} value and a unique +identification number. + +Each of the parameters can be a special value which means this value +is to be omitted. The symbolic names for these values are: + +@vtable @code +@item MM_NULLLBL +Ignore @var{label} parameter. +@item MM_NULLSEV +Ignore @var{severity} parameter. +@item MM_NULLMC +Ignore @var{classification} parameter. This implies that nothing is +actually printed. +@item MM_NULLTXT +Ignore @var{text} parameter. +@item MM_NULLACT +Ignore @var{action} parameter. +@item MM_NULLTAG +Ignore @var{tag} parameter. +@end vtable + +There is another way certain fields can be omitted from the output to +standard error. This is described below in the description of +environment variables influencing the behavior. + +The @var{severity} parameter can have one of the values in the following +table: +@cindex severity class + +@vtable @code +@item MM_NOSEV +Nothing is printed, this value is the same as @code{MM_NULLSEV}. +@item MM_HALT +This value is printed as @code{HALT}. +@item MM_ERROR +This value is printed as @code{ERROR}. +@item MM_WARNING +This value is printed as @code{WARNING}. +@item MM_INFO +This value is printed as @code{INFO}. +@end vtable + +The numeric value of these five macros are between @code{0} and +@code{4}. Using the environment variable @code{SEV_LEVEL} or using the +@code{addseverity} function one can add more severity levels with their +corresponding string to print. This is described below +(@pxref{Adding Severity Classes}). + +@noindent +If no parameter is ignored the output looks like this: + +@smallexample +@var{label}: @var{severity-string}: @var{text} +TO FIX: @var{action} @var{tag} +@end smallexample + +The colons, new line characters and the @code{TO FIX} string are +inserted if necessary, i.e., if the corresponding parameter is not +ignored. + +This function is specified in the X/Open Portability Guide. It is also +available on all systems derived from System V. + +The function returns the value @code{MM_OK} if no error occurred. If +only the printing to standard error failed, it returns @code{MM_NOMSG}. +If printing to the console fails, it returns @code{MM_NOCON}. If +nothing is printed @code{MM_NOTOK} is returned. Among situations where +all outputs fail this last value is also returned if a parameter value +is incorrect. +@end deftypefun + +There are two environment variables which influence the behavior of +@code{fmtmsg}. The first is @code{MSGVERB}. It is used to control the +output actually happening on standard error (@emph{not} the console +output). Each of the five fields can explicitly be enabled. To do +this the user has to put the @code{MSGVERB} variable with a format like +the following in the environment before calling the @code{fmtmsg} function +the first time: + +@smallexample +MSGVERB=@var{keyword}[:@var{keyword}[:@dots{}]] +@end smallexample + +Valid @var{keyword}s are @code{label}, @code{severity}, @code{text}, +@code{action}, and @code{tag}. If the environment variable is not given +or is the empty string, a not supported keyword is given or the value is +somehow else invalid, no part of the message is masked out. + +The second environment variable which influences the behavior of +@code{fmtmsg} is @code{SEV_LEVEL}. This variable and the change in the +behavior of @code{fmtmsg} is not specified in the X/Open Portability +Guide. It is available in System V systems, though. It can be used to +introduce new severity levels. By default, only the five severity levels +described above are available. Any other numeric value would make +@code{fmtmsg} print nothing. + +If the user puts @code{SEV_LEVEL} with a format like + +@smallexample +SEV_LEVEL=[@var{description}[:@var{description}[:@dots{}]]] +@end smallexample + +@noindent +in the environment of the process before the first call to +@code{fmtmsg}, where @var{description} has a value of the form + +@smallexample +@var{severity-keyword},@var{level},@var{printstring} +@end smallexample + +The @var{severity-keyword} part is not used by @code{fmtmsg} but it has +to be present. The @var{level} part is a string representation of a +number. The numeric value must be a number greater than 4. This value +must be used in the @var{severity} parameter of @code{fmtmsg} to select +this class. It is not possible to overwrite any of the predefined +classes. The @var{printstring} is the string printed when a message of +this class is processed by @code{fmtmsg} (see above, @code{fmtsmg} does +not print the numeric value but instead the string representation). + + +@node Adding Severity Classes +@subsection Adding Severity Classes +@cindex severity class + +There is another possibility to introduce severity classes besides using +the environment variable @code{SEV_LEVEL}. This simplifies the task of +introducing new classes in a running program. One could use the +@code{setenv} or @code{putenv} function to set the environment variable, +but this is toilsome. + +@deftypefun int addseverity (int @var{severity}, const char *@var{string}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{}}} +This function allows the introduction of new severity classes which can be +addressed by the @var{severity} parameter of the @code{fmtmsg} function. +The @var{severity} parameter of @code{addseverity} must match the value +for the parameter with the same name of @code{fmtmsg}, and @var{string} +is the string printed in the actual messages instead of the numeric +value. + +If @var{string} is @code{NULL} the severity class with the numeric value +according to @var{severity} is removed. + +It is not possible to overwrite or remove one of the default severity +classes. All calls to @code{addseverity} with @var{severity} set to one +of the values for the default classes will fail. + +The return value is @code{MM_OK} if the task was successfully performed. +If the return value is @code{MM_NOTOK} something went wrong. This could +mean that no more memory is available or a class is not available when +it has to be removed. + +This function is not specified in the X/Open Portability Guide although +the @code{fmtsmg} function is. It is available on System V systems. +@end deftypefun + + +@node Example +@subsection How to use @code{fmtmsg} and @code{addseverity} + +Here is a simple example program to illustrate the use of both +functions described in this section. + +@smallexample +@include fmtmsgexpl.c.texi +@end smallexample + +The second call to @code{fmtmsg} illustrates a use of this function as +it usually occurs on System V systems, which heavily use this function. +It seems worthwhile to give a short explanation here of how this system +works on System V. The value of the +@var{label} field (@code{UX:cat}) says that the error occurred in the +Unix program @code{cat}. The explanation of the error follows and the +value for the @var{action} parameter is @code{"refer to manual"}. One +could be more specific here, if necessary. The @var{tag} field contains, +as proposed above, the value of the string given for the @var{label} +parameter, and additionally a unique ID (@code{001} in this case). For +a GNU environment this string could contain a reference to the +corresponding node in the Info page for the program. + +@noindent +Running this program without specifying the @code{MSGVERB} and +@code{SEV_LEVEL} function produces the following output: + +@smallexample +UX:cat: NOTE2: invalid syntax +TO FIX: refer to manual UX:cat:001 +@end smallexample + +We see the different fields of the message and how the extra glue (the +colons and the @code{TO FIX} string) is printed. But only one of the +three calls to @code{fmtmsg} produced output. The first call does not +print anything because the @var{label} parameter is not in the correct +form. The string must contain two fields, separated by a colon +(@pxref{Printing Formatted Messages}). The third @code{fmtmsg} call +produced no output since the class with the numeric value @code{6} is +not defined. Although a class with numeric value @code{5} is also not +defined by default, the call to @code{addseverity} introduces it and +the second call to @code{fmtmsg} produces the above output. + +When we change the environment of the program to contain +@code{SEV_LEVEL=XXX,6,NOTE} when running it we get a different result: + +@smallexample +UX:cat: NOTE2: invalid syntax +TO FIX: refer to manual UX:cat:001 +label:foo: NOTE: text +TO FIX: action tag +@end smallexample + +Now the third call to @code{fmtmsg} produced some output and we see how +the string @code{NOTE} from the environment variable appears in the +message. + +Now we can reduce the output by specifying which fields we are +interested in. If we additionally set the environment variable +@code{MSGVERB} to the value @code{severity:label:action} we get the +following output: + +@smallexample +UX:cat: NOTE2 +TO FIX: refer to manual +label:foo: NOTE +TO FIX: action +@end smallexample + +@noindent +I.e., the output produced by the @var{text} and the @var{tag} parameters +to @code{fmtmsg} vanished. Please also note that now there is no colon +after the @code{NOTE} and @code{NOTE2} strings in the output. This is +not necessary since there is no more output on this line because the text +is missing. diff --git a/REORG.TODO/manual/string.texi b/REORG.TODO/manual/string.texi new file mode 100644 index 0000000000..b8810d66b7 --- /dev/null +++ b/REORG.TODO/manual/string.texi @@ -0,0 +1,3000 @@ +@node String and Array Utilities, Character Set Handling, Character Handling, Top +@c %MENU% Utilities for copying and comparing strings and arrays +@chapter String and Array Utilities + +Operations on strings (null-terminated byte sequences) are an important part of +many programs. @Theglibc{} provides an extensive set of string +utility functions, including functions for copying, concatenating, +comparing, and searching strings. Many of these functions can also +operate on arbitrary regions of storage; for example, the @code{memcpy} +function can be used to copy the contents of any kind of array. + +It's fairly common for beginning C programmers to ``reinvent the wheel'' +by duplicating this functionality in their own code, but it pays to +become familiar with the library functions and to make use of them, +since this offers benefits in maintenance, efficiency, and portability. + +For instance, you could easily compare one string to another in two +lines of C code, but if you use the built-in @code{strcmp} function, +you're less likely to make a mistake. And, since these library +functions are typically highly optimized, your program may run faster +too. + +@menu +* Representation of Strings:: Introduction to basic concepts. +* String/Array Conventions:: Whether to use a string function or an + arbitrary array function. +* String Length:: Determining the length of a string. +* Copying Strings and Arrays:: Functions to copy strings and arrays. +* Concatenating Strings:: Functions to concatenate strings while copying. +* Truncating Strings:: Functions to truncate strings while copying. +* String/Array Comparison:: Functions for byte-wise and character-wise + comparison. +* Collation Functions:: Functions for collating strings. +* Search Functions:: Searching for a specific element or substring. +* Finding Tokens in a String:: Splitting a string into tokens by looking + for delimiters. +* Erasing Sensitive Data:: Clearing memory which contains sensitive + data, after it's no longer needed. +* strfry:: Function for flash-cooking a string. +* Trivial Encryption:: Obscuring data. +* Encode Binary Data:: Encoding and Decoding of Binary Data. +* Argz and Envz Vectors:: Null-separated string vectors. +@end menu + +@node Representation of Strings +@section Representation of Strings +@cindex string, representation of + +This section is a quick summary of string concepts for beginning C +programmers. It describes how strings are represented in C +and some common pitfalls. If you are already familiar with this +material, you can skip this section. + +@cindex string +A @dfn{string} is a null-terminated array of bytes of type @code{char}, +including the terminating null byte. String-valued +variables are usually declared to be pointers of type @code{char *}. +Such variables do not include space for the text of a string; that has +to be stored somewhere else---in an array variable, a string constant, +or dynamically allocated memory (@pxref{Memory Allocation}). It's up to +you to store the address of the chosen memory space into the pointer +variable. Alternatively you can store a @dfn{null pointer} in the +pointer variable. The null pointer does not point anywhere, so +attempting to reference the string it points to gets an error. + +@cindex multibyte character +@cindex multibyte string +@cindex wide string +A @dfn{multibyte character} is a sequence of one or more bytes that +represents a single character using the locale's encoding scheme; a +null byte always represents the null character. A @dfn{multibyte +string} is a string that consists entirely of multibyte +characters. In contrast, a @dfn{wide string} is a null-terminated +sequence of @code{wchar_t} objects. A wide-string variable is usually +declared to be a pointer of type @code{wchar_t *}, by analogy with +string variables and @code{char *}. @xref{Extended Char Intro}. + +@cindex null byte +@cindex null wide character +By convention, the @dfn{null byte}, @code{'\0'}, +marks the end of a string and the @dfn{null wide character}, +@code{L'\0'}, marks the end of a wide string. For example, in +testing to see whether the @code{char *} variable @var{p} points to a +null byte marking the end of a string, you can write +@code{!*@var{p}} or @code{*@var{p} == '\0'}. + +A null byte is quite different conceptually from a null pointer, +although both are represented by the integer constant @code{0}. + +@cindex string literal +A @dfn{string literal} appears in C program source as a multibyte +string between double-quote characters (@samp{"}). If the +initial double-quote character is immediately preceded by a capital +@samp{L} (ell) character (as in @code{L"foo"}), it is a wide string +literal. String literals can also contribute to @dfn{string +concatenation}: @code{"a" "b"} is the same as @code{"ab"}. +For wide strings one can use either +@code{L"a" L"b"} or @code{L"a" "b"}. Modification of string literals is +not allowed by the GNU C compiler, because literals are placed in +read-only storage. + +Arrays that are declared @code{const} cannot be modified +either. It's generally good style to declare non-modifiable string +pointers to be of type @code{const char *}, since this often allows the +C compiler to detect accidental modifications as well as providing some +amount of documentation about what your program intends to do with the +string. + +The amount of memory allocated for a byte array may extend past the null byte +that marks the end of the string that the array contains. In this +document, the term @dfn{allocated size} is always used to refer to the +total amount of memory allocated for an array, while the term +@dfn{length} refers to the number of bytes up to (but not including) +the terminating null byte. Wide strings are similar, except their +sizes and lengths count wide characters, not bytes. +@cindex length of string +@cindex allocation size of string +@cindex size of string +@cindex string length +@cindex string allocation + +A notorious source of program bugs is trying to put more bytes into a +string than fit in its allocated size. When writing code that extends +strings or moves bytes into a pre-allocated array, you should be +very careful to keep track of the length of the text and make explicit +checks for overflowing the array. Many of the library functions +@emph{do not} do this for you! Remember also that you need to allocate +an extra byte to hold the null byte that marks the end of the +string. + +@cindex single-byte string +@cindex multibyte string +Originally strings were sequences of bytes where each byte represented a +single character. This is still true today if the strings are encoded +using a single-byte character encoding. Things are different if the +strings are encoded using a multibyte encoding (for more information on +encodings see @ref{Extended Char Intro}). There is no difference in +the programming interface for these two kind of strings; the programmer +has to be aware of this and interpret the byte sequences accordingly. + +But since there is no separate interface taking care of these +differences the byte-based string functions are sometimes hard to use. +Since the count parameters of these functions specify bytes a call to +@code{memcpy} could cut a multibyte character in the middle and put an +incomplete (and therefore unusable) byte sequence in the target buffer. + +@cindex wide string +To avoid these problems later versions of the @w{ISO C} standard +introduce a second set of functions which are operating on @dfn{wide +characters} (@pxref{Extended Char Intro}). These functions don't have +the problems the single-byte versions have since every wide character is +a legal, interpretable value. This does not mean that cutting wide +strings at arbitrary points is without problems. It normally +is for alphabet-based languages (except for non-normalized text) but +languages based on syllables still have the problem that more than one +wide character is necessary to complete a logical unit. This is a +higher level problem which the @w{C library} functions are not designed +to solve. But it is at least good that no invalid byte sequences can be +created. Also, the higher level functions can also much more easily operate +on wide characters than on multibyte characters so that a common strategy +is to use wide characters internally whenever text is more than simply +copied. + +The remaining of this chapter will discuss the functions for handling +wide strings in parallel with the discussion of +strings since there is almost always an exact equivalent +available. + +@node String/Array Conventions +@section String and Array Conventions + +This chapter describes both functions that work on arbitrary arrays or +blocks of memory, and functions that are specific to strings and wide +strings. + +Functions that operate on arbitrary blocks of memory have names +beginning with @samp{mem} and @samp{wmem} (such as @code{memcpy} and +@code{wmemcpy}) and invariably take an argument which specifies the size +(in bytes and wide characters respectively) of the block of memory to +operate on. The array arguments and return values for these functions +have type @code{void *} or @code{wchar_t}. As a matter of style, the +elements of the arrays used with the @samp{mem} functions are referred +to as ``bytes''. You can pass any kind of pointer to these functions, +and the @code{sizeof} operator is useful in computing the value for the +size argument. Parameters to the @samp{wmem} functions must be of type +@code{wchar_t *}. These functions are not really usable with anything +but arrays of this type. + +In contrast, functions that operate specifically on strings and wide +strings have names beginning with @samp{str} and @samp{wcs} +respectively (such as @code{strcpy} and @code{wcscpy}) and look for a +terminating null byte or null wide character instead of requiring an explicit +size argument to be passed. (Some of these functions accept a specified +maximum length, but they also check for premature termination.) +The array arguments and return values for these +functions have type @code{char *} and @code{wchar_t *} respectively, and +the array elements are referred to as ``bytes'' and ``wide +characters''. + +In many cases, there are both @samp{mem} and @samp{str}/@samp{wcs} +versions of a function. The one that is more appropriate to use depends +on the exact situation. When your program is manipulating arbitrary +arrays or blocks of storage, then you should always use the @samp{mem} +functions. On the other hand, when you are manipulating +strings it is usually more convenient to use the @samp{str}/@samp{wcs} +functions, unless you already know the length of the string in advance. +The @samp{wmem} functions should be used for wide character arrays with +known size. + +@cindex wint_t +@cindex parameter promotion +Some of the memory and string functions take single characters as +arguments. Since a value of type @code{char} is automatically promoted +into a value of type @code{int} when used as a parameter, the functions +are declared with @code{int} as the type of the parameter in question. +In case of the wide character functions the situation is similar: the +parameter type for a single wide character is @code{wint_t} and not +@code{wchar_t}. This would for many implementations not be necessary +since @code{wchar_t} is large enough to not be automatically +promoted, but since the @w{ISO C} standard does not require such a +choice of types the @code{wint_t} type is used. + +@node String Length +@section String Length + +You can get the length of a string using the @code{strlen} function. +This function is declared in the header file @file{string.h}. +@pindex string.h + +@comment string.h +@comment ISO +@deftypefun size_t strlen (const char *@var{s}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{strlen} function returns the length of the +string @var{s} in bytes. (In other words, it returns the offset of the +terminating null byte within the array.) + +For example, +@smallexample +strlen ("hello, world") + @result{} 12 +@end smallexample + +When applied to an array, the @code{strlen} function returns +the length of the string stored there, not its allocated size. You can +get the allocated size of the array that holds a string using +the @code{sizeof} operator: + +@smallexample +char string[32] = "hello, world"; +sizeof (string) + @result{} 32 +strlen (string) + @result{} 12 +@end smallexample + +But beware, this will not work unless @var{string} is the +array itself, not a pointer to it. For example: + +@smallexample +char string[32] = "hello, world"; +char *ptr = string; +sizeof (string) + @result{} 32 +sizeof (ptr) + @result{} 4 /* @r{(on a machine with 4 byte pointers)} */ +@end smallexample + +This is an easy mistake to make when you are working with functions that +take string arguments; those arguments are always pointers, not arrays. + +It must also be noted that for multibyte encoded strings the return +value does not have to correspond to the number of characters in the +string. To get this value the string can be converted to wide +characters and @code{wcslen} can be used or something like the following +code can be used: + +@smallexample +/* @r{The input is in @code{string}.} + @r{The length is expected in @code{n}.} */ +@{ + mbstate_t t; + char *scopy = string; + /* In initial state. */ + memset (&t, '\0', sizeof (t)); + /* Determine number of characters. */ + n = mbsrtowcs (NULL, &scopy, strlen (scopy), &t); +@} +@end smallexample + +This is cumbersome to do so if the number of characters (as opposed to +bytes) is needed often it is better to work with wide characters. +@end deftypefun + +The wide character equivalent is declared in @file{wchar.h}. + +@comment wchar.h +@comment ISO +@deftypefun size_t wcslen (const wchar_t *@var{ws}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{wcslen} function is the wide character equivalent to +@code{strlen}. The return value is the number of wide characters in the +wide string pointed to by @var{ws} (this is also the offset of +the terminating null wide character of @var{ws}). + +Since there are no multi wide character sequences making up one wide +character the return value is not only the offset in the array, it is +also the number of wide characters. + +This function was introduced in @w{Amendment 1} to @w{ISO C90}. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefun size_t strnlen (const char *@var{s}, size_t @var{maxlen}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +If the array @var{s} of size @var{maxlen} contains a null byte, +the @code{strnlen} function returns the length of the string @var{s} in +bytes. Otherwise it +returns @var{maxlen}. Therefore this function is equivalent to +@code{(strlen (@var{s}) < @var{maxlen} ? strlen (@var{s}) : @var{maxlen})} +but it +is more efficient and works even if @var{s} is not null-terminated so +long as @var{maxlen} does not exceed the size of @var{s}'s array. + +@smallexample +char string[32] = "hello, world"; +strnlen (string, 32) + @result{} 12 +strnlen (string, 5) + @result{} 5 +@end smallexample + +This function is a GNU extension and is declared in @file{string.h}. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun size_t wcsnlen (const wchar_t *@var{ws}, size_t @var{maxlen}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{wcsnlen} is the wide character equivalent to @code{strnlen}. The +@var{maxlen} parameter specifies the maximum number of wide characters. + +This function is a GNU extension and is declared in @file{wchar.h}. +@end deftypefun + +@node Copying Strings and Arrays +@section Copying Strings and Arrays + +You can use the functions described in this section to copy the contents +of strings, wide strings, and arrays. The @samp{str} and @samp{mem} +functions are declared in @file{string.h} while the @samp{w} functions +are declared in @file{wchar.h}. +@pindex string.h +@pindex wchar.h +@cindex copying strings and arrays +@cindex string copy functions +@cindex array copy functions +@cindex concatenating strings +@cindex string concatenation functions + +A helpful way to remember the ordering of the arguments to the functions +in this section is that it corresponds to an assignment expression, with +the destination array specified to the left of the source array. Most +of these functions return the address of the destination array; a few +return the address of the destination's terminating null, or of just +past the destination. + +Most of these functions do not work properly if the source and +destination arrays overlap. For example, if the beginning of the +destination array overlaps the end of the source array, the original +contents of that part of the source array may get overwritten before it +is copied. Even worse, in the case of the string functions, the null +byte marking the end of the string may be lost, and the copy +function might get stuck in a loop trashing all the memory allocated to +your program. + +All functions that have problems copying between overlapping arrays are +explicitly identified in this manual. In addition to functions in this +section, there are a few others like @code{sprintf} (@pxref{Formatted +Output Functions}) and @code{scanf} (@pxref{Formatted Input +Functions}). + +@comment string.h +@comment ISO +@deftypefun {void *} memcpy (void *restrict @var{to}, const void *restrict @var{from}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{memcpy} function copies @var{size} bytes from the object +beginning at @var{from} into the object beginning at @var{to}. The +behavior of this function is undefined if the two arrays @var{to} and +@var{from} overlap; use @code{memmove} instead if overlapping is possible. + +The value returned by @code{memcpy} is the value of @var{to}. + +Here is an example of how you might use @code{memcpy} to copy the +contents of an array: + +@smallexample +struct foo *oldarray, *newarray; +int arraysize; +@dots{} +memcpy (new, old, arraysize * sizeof (struct foo)); +@end smallexample +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wmemcpy (wchar_t *restrict @var{wto}, const wchar_t *restrict @var{wfrom}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{wmemcpy} function copies @var{size} wide characters from the object +beginning at @var{wfrom} into the object beginning at @var{wto}. The +behavior of this function is undefined if the two arrays @var{wto} and +@var{wfrom} overlap; use @code{wmemmove} instead if overlapping is possible. + +The following is a possible implementation of @code{wmemcpy} but there +are more optimizations possible. + +@smallexample +wchar_t * +wmemcpy (wchar_t *restrict wto, const wchar_t *restrict wfrom, + size_t size) +@{ + return (wchar_t *) memcpy (wto, wfrom, size * sizeof (wchar_t)); +@} +@end smallexample + +The value returned by @code{wmemcpy} is the value of @var{wto}. + +This function was introduced in @w{Amendment 1} to @w{ISO C90}. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefun {void *} mempcpy (void *restrict @var{to}, const void *restrict @var{from}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{mempcpy} function is nearly identical to the @code{memcpy} +function. It copies @var{size} bytes from the object beginning at +@code{from} into the object pointed to by @var{to}. But instead of +returning the value of @var{to} it returns a pointer to the byte +following the last written byte in the object beginning at @var{to}. +I.e., the value is @code{((void *) ((char *) @var{to} + @var{size}))}. + +This function is useful in situations where a number of objects shall be +copied to consecutive memory positions. + +@smallexample +void * +combine (void *o1, size_t s1, void *o2, size_t s2) +@{ + void *result = malloc (s1 + s2); + if (result != NULL) + mempcpy (mempcpy (result, o1, s1), o2, s2); + return result; +@} +@end smallexample + +This function is a GNU extension. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun {wchar_t *} wmempcpy (wchar_t *restrict @var{wto}, const wchar_t *restrict @var{wfrom}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{wmempcpy} function is nearly identical to the @code{wmemcpy} +function. It copies @var{size} wide characters from the object +beginning at @code{wfrom} into the object pointed to by @var{wto}. But +instead of returning the value of @var{wto} it returns a pointer to the +wide character following the last written wide character in the object +beginning at @var{wto}. I.e., the value is @code{@var{wto} + @var{size}}. + +This function is useful in situations where a number of objects shall be +copied to consecutive memory positions. + +The following is a possible implementation of @code{wmemcpy} but there +are more optimizations possible. + +@smallexample +wchar_t * +wmempcpy (wchar_t *restrict wto, const wchar_t *restrict wfrom, + size_t size) +@{ + return (wchar_t *) mempcpy (wto, wfrom, size * sizeof (wchar_t)); +@} +@end smallexample + +This function is a GNU extension. +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun {void *} memmove (void *@var{to}, const void *@var{from}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{memmove} copies the @var{size} bytes at @var{from} into the +@var{size} bytes at @var{to}, even if those two blocks of space +overlap. In the case of overlap, @code{memmove} is careful to copy the +original values of the bytes in the block at @var{from}, including those +bytes which also belong to the block at @var{to}. + +The value returned by @code{memmove} is the value of @var{to}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wmemmove (wchar_t *@var{wto}, const wchar_t *@var{wfrom}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{wmemmove} copies the @var{size} wide characters at @var{wfrom} +into the @var{size} wide characters at @var{wto}, even if those two +blocks of space overlap. In the case of overlap, @code{wmemmove} is +careful to copy the original values of the wide characters in the block +at @var{wfrom}, including those wide characters which also belong to the +block at @var{wto}. + +The following is a possible implementation of @code{wmemcpy} but there +are more optimizations possible. + +@smallexample +wchar_t * +wmempcpy (wchar_t *restrict wto, const wchar_t *restrict wfrom, + size_t size) +@{ + return (wchar_t *) mempcpy (wto, wfrom, size * sizeof (wchar_t)); +@} +@end smallexample + +The value returned by @code{wmemmove} is the value of @var{wto}. + +This function is a GNU extension. +@end deftypefun + +@comment string.h +@comment SVID +@deftypefun {void *} memccpy (void *restrict @var{to}, const void *restrict @var{from}, int @var{c}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function copies no more than @var{size} bytes from @var{from} to +@var{to}, stopping if a byte matching @var{c} is found. The return +value is a pointer into @var{to} one byte past where @var{c} was copied, +or a null pointer if no byte matching @var{c} appeared in the first +@var{size} bytes of @var{from}. +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun {void *} memset (void *@var{block}, int @var{c}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function copies the value of @var{c} (converted to an +@code{unsigned char}) into each of the first @var{size} bytes of the +object beginning at @var{block}. It returns the value of @var{block}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wmemset (wchar_t *@var{block}, wchar_t @var{wc}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function copies the value of @var{wc} into each of the first +@var{size} wide characters of the object beginning at @var{block}. It +returns the value of @var{block}. +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun {char *} strcpy (char *restrict @var{to}, const char *restrict @var{from}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This copies bytes from the string @var{from} (up to and including +the terminating null byte) into the string @var{to}. Like +@code{memcpy}, this function has undefined results if the strings +overlap. The return value is the value of @var{to}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcscpy (wchar_t *restrict @var{wto}, const wchar_t *restrict @var{wfrom}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This copies wide characters from the wide string @var{wfrom} (up to and +including the terminating null wide character) into the string +@var{wto}. Like @code{wmemcpy}, this function has undefined results if +the strings overlap. The return value is the value of @var{wto}. +@end deftypefun + +@comment SVID +@deftypefun {char *} strdup (const char *@var{s}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This function copies the string @var{s} into a newly +allocated string. The string is allocated using @code{malloc}; see +@ref{Unconstrained Allocation}. If @code{malloc} cannot allocate space +for the new string, @code{strdup} returns a null pointer. Otherwise it +returns a pointer to the new string. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun {wchar_t *} wcsdup (const wchar_t *@var{ws}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This function copies the wide string @var{ws} +into a newly allocated string. The string is allocated using +@code{malloc}; see @ref{Unconstrained Allocation}. If @code{malloc} +cannot allocate space for the new string, @code{wcsdup} returns a null +pointer. Otherwise it returns a pointer to the new wide string. + +This function is a GNU extension. +@end deftypefun + +@comment string.h +@comment Unknown origin +@deftypefun {char *} stpcpy (char *restrict @var{to}, const char *restrict @var{from}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is like @code{strcpy}, except that it returns a pointer to +the end of the string @var{to} (that is, the address of the terminating +null byte @code{to + strlen (from)}) rather than the beginning. + +For example, this program uses @code{stpcpy} to concatenate @samp{foo} +and @samp{bar} to produce @samp{foobar}, which it then prints. + +@smallexample +@include stpcpy.c.texi +@end smallexample + +This function is part of POSIX.1-2008 and later editions, but was +available in @theglibc{} and other systems as an extension long before +it was standardized. + +Its behavior is undefined if the strings overlap. The function is +declared in @file{string.h}. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun {wchar_t *} wcpcpy (wchar_t *restrict @var{wto}, const wchar_t *restrict @var{wfrom}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is like @code{wcscpy}, except that it returns a pointer to +the end of the string @var{wto} (that is, the address of the terminating +null wide character @code{wto + wcslen (wfrom)}) rather than the beginning. + +This function is not part of ISO or POSIX but was found useful while +developing @theglibc{} itself. + +The behavior of @code{wcpcpy} is undefined if the strings overlap. + +@code{wcpcpy} is a GNU extension and is declared in @file{wchar.h}. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefn {Macro} {char *} strdupa (const char *@var{s}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This macro is similar to @code{strdup} but allocates the new string +using @code{alloca} instead of @code{malloc} (@pxref{Variable Size +Automatic}). This means of course the returned string has the same +limitations as any block of memory allocated using @code{alloca}. + +For obvious reasons @code{strdupa} is implemented only as a macro; +you cannot get the address of this function. Despite this limitation +it is a useful function. The following code shows a situation where +using @code{malloc} would be a lot more expensive. + +@smallexample +@include strdupa.c.texi +@end smallexample + +Please note that calling @code{strtok} using @var{path} directly is +invalid. It is also not allowed to call @code{strdupa} in the argument +list of @code{strtok} since @code{strdupa} uses @code{alloca} +(@pxref{Variable Size Automatic}) can interfere with the parameter +passing. + +This function is only available if GNU CC is used. +@end deftypefn + +@comment string.h +@comment BSD +@deftypefun void bcopy (const void *@var{from}, void *@var{to}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is a partially obsolete alternative for @code{memmove}, derived from +BSD. Note that it is not quite equivalent to @code{memmove}, because the +arguments are not in the same order and there is no return value. +@end deftypefun + +@comment string.h +@comment BSD +@deftypefun void bzero (void *@var{block}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is a partially obsolete alternative for @code{memset}, derived from +BSD. Note that it is not as general as @code{memset}, because the only +value it can store is zero. +@end deftypefun + +@node Concatenating Strings +@section Concatenating Strings +@pindex string.h +@pindex wchar.h +@cindex concatenating strings +@cindex string concatenation functions + +The functions described in this section concatenate the contents of a +string or wide string to another. They follow the string-copying +functions in their conventions. @xref{Copying Strings and Arrays}. +@samp{strcat} is declared in the header file @file{string.h} while +@samp{wcscat} is declared in @file{wchar.h}. + +@comment string.h +@comment ISO +@deftypefun {char *} strcat (char *restrict @var{to}, const char *restrict @var{from}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{strcat} function is similar to @code{strcpy}, except that the +bytes from @var{from} are concatenated or appended to the end of +@var{to}, instead of overwriting it. That is, the first byte from +@var{from} overwrites the null byte marking the end of @var{to}. + +An equivalent definition for @code{strcat} would be: + +@smallexample +char * +strcat (char *restrict to, const char *restrict from) +@{ + strcpy (to + strlen (to), from); + return to; +@} +@end smallexample + +This function has undefined results if the strings overlap. + +As noted below, this function has significant performance issues. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcscat (wchar_t *restrict @var{wto}, const wchar_t *restrict @var{wfrom}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{wcscat} function is similar to @code{wcscpy}, except that the +wide characters from @var{wfrom} are concatenated or appended to the end of +@var{wto}, instead of overwriting it. That is, the first wide character from +@var{wfrom} overwrites the null wide character marking the end of @var{wto}. + +An equivalent definition for @code{wcscat} would be: + +@smallexample +wchar_t * +wcscat (wchar_t *wto, const wchar_t *wfrom) +@{ + wcscpy (wto + wcslen (wto), wfrom); + return wto; +@} +@end smallexample + +This function has undefined results if the strings overlap. + +As noted below, this function has significant performance issues. +@end deftypefun + +Programmers using the @code{strcat} or @code{wcscat} function (or the +@code{strncat} or @code{wcsncat} functions defined in +a later section, for that matter) +can easily be recognized as lazy and reckless. In almost all situations +the lengths of the participating strings are known (it better should be +since how can one otherwise ensure the allocated size of the buffer is +sufficient?) Or at least, one could know them if one keeps track of the +results of the various function calls. But then it is very inefficient +to use @code{strcat}/@code{wcscat}. A lot of time is wasted finding the +end of the destination string so that the actual copying can start. +This is a common example: + +@cindex va_copy +@smallexample +/* @r{This function concatenates arbitrarily many strings. The last} + @r{parameter must be @code{NULL}.} */ +char * +concat (const char *str, @dots{}) +@{ + va_list ap, ap2; + size_t total = 1; + const char *s; + char *result; + + va_start (ap, str); + va_copy (ap2, ap); + + /* @r{Determine how much space we need.} */ + for (s = str; s != NULL; s = va_arg (ap, const char *)) + total += strlen (s); + + va_end (ap); + + result = (char *) malloc (total); + if (result != NULL) + @{ + result[0] = '\0'; + + /* @r{Copy the strings.} */ + for (s = str; s != NULL; s = va_arg (ap2, const char *)) + strcat (result, s); + @} + + va_end (ap2); + + return result; +@} +@end smallexample + +This looks quite simple, especially the second loop where the strings +are actually copied. But these innocent lines hide a major performance +penalty. Just imagine that ten strings of 100 bytes each have to be +concatenated. For the second string we search the already stored 100 +bytes for the end of the string so that we can append the next string. +For all strings in total the comparisons necessary to find the end of +the intermediate results sums up to 5500! If we combine the copying +with the search for the allocation we can write this function more +efficiently: + +@smallexample +char * +concat (const char *str, @dots{}) +@{ + va_list ap; + size_t allocated = 100; + char *result = (char *) malloc (allocated); + + if (result != NULL) + @{ + char *newp; + char *wp; + const char *s; + + va_start (ap, str); + + wp = result; + for (s = str; s != NULL; s = va_arg (ap, const char *)) + @{ + size_t len = strlen (s); + + /* @r{Resize the allocated memory if necessary.} */ + if (wp + len + 1 > result + allocated) + @{ + allocated = (allocated + len) * 2; + newp = (char *) realloc (result, allocated); + if (newp == NULL) + @{ + free (result); + return NULL; + @} + wp = newp + (wp - result); + result = newp; + @} + + wp = mempcpy (wp, s, len); + @} + + /* @r{Terminate the result string.} */ + *wp++ = '\0'; + + /* @r{Resize memory to the optimal size.} */ + newp = realloc (result, wp - result); + if (newp != NULL) + result = newp; + + va_end (ap); + @} + + return result; +@} +@end smallexample + +With a bit more knowledge about the input strings one could fine-tune +the memory allocation. The difference we are pointing to here is that +we don't use @code{strcat} anymore. We always keep track of the length +of the current intermediate result so we can save ourselves the search for the +end of the string and use @code{mempcpy}. Please note that we also +don't use @code{stpcpy} which might seem more natural since we are handling +strings. But this is not necessary since we already know the +length of the string and therefore can use the faster memory copying +function. The example would work for wide characters the same way. + +Whenever a programmer feels the need to use @code{strcat} she or he +should think twice and look through the program to see whether the code cannot +be rewritten to take advantage of already calculated results. Again: it +is almost always unnecessary to use @code{strcat}. + +@node Truncating Strings +@section Truncating Strings while Copying +@cindex truncating strings +@cindex string truncation + +The functions described in this section copy or concatenate the +possibly-truncated contents of a string or array to another, and +similarly for wide strings. They follow the string-copying functions +in their header conventions. @xref{Copying Strings and Arrays}. The +@samp{str} functions are declared in the header file @file{string.h} +and the @samp{wc} functions are declared in the file @file{wchar.h}. + +@comment string.h +@deftypefun {char *} strncpy (char *restrict @var{to}, const char *restrict @var{from}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{strcpy} but always copies exactly +@var{size} bytes into @var{to}. + +If @var{from} does not contain a null byte in its first @var{size} +bytes, @code{strncpy} copies just the first @var{size} bytes. In this +case no null terminator is written into @var{to}. + +Otherwise @var{from} must be a string with length less than +@var{size}. In this case @code{strncpy} copies all of @var{from}, +followed by enough null bytes to add up to @var{size} bytes in all. + +The behavior of @code{strncpy} is undefined if the strings overlap. + +This function was designed for now-rarely-used arrays consisting of +non-null bytes followed by zero or more null bytes. It needs to set +all @var{size} bytes of the destination, even when @var{size} is much +greater than the length of @var{from}. As noted below, this function +is generally a poor choice for processing text. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcsncpy (wchar_t *restrict @var{wto}, const wchar_t *restrict @var{wfrom}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{wcscpy} but always copies exactly +@var{size} wide characters into @var{wto}. + +If @var{wfrom} does not contain a null wide character in its first +@var{size} wide characters, then @code{wcsncpy} copies just the first +@var{size} wide characters. In this case no null terminator is +written into @var{wto}. + +Otherwise @var{wfrom} must be a wide string with length less than +@var{size}. In this case @code{wcsncpy} copies all of @var{wfrom}, +followed by enough null wide characters to add up to @var{size} wide +characters in all. + +The behavior of @code{wcsncpy} is undefined if the strings overlap. + +This function is the wide-character counterpart of @code{strncpy} and +suffers from most of the problems that @code{strncpy} does. For +example, as noted below, this function is generally a poor choice for +processing text. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefun {char *} strndup (const char *@var{s}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +This function is similar to @code{strdup} but always copies at most +@var{size} bytes into the newly allocated string. + +If the length of @var{s} is more than @var{size}, then @code{strndup} +copies just the first @var{size} bytes and adds a closing null byte. +Otherwise all bytes are copied and the string is terminated. + +This function differs from @code{strncpy} in that it always terminates +the destination string. + +As noted below, this function is generally a poor choice for +processing text. + +@code{strndup} is a GNU extension. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefn {Macro} {char *} strndupa (const char *@var{s}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{strndup} but like @code{strdupa} it +allocates the new string using @code{alloca} @pxref{Variable Size +Automatic}. The same advantages and limitations of @code{strdupa} are +valid for @code{strndupa}, too. + +This function is implemented only as a macro, just like @code{strdupa}. +Just as @code{strdupa} this macro also must not be used inside the +parameter list in a function call. + +As noted below, this function is generally a poor choice for +processing text. + +@code{strndupa} is only available if GNU CC is used. +@end deftypefn + +@comment string.h +@comment GNU +@deftypefun {char *} stpncpy (char *restrict @var{to}, const char *restrict @var{from}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{stpcpy} but copies always exactly +@var{size} bytes into @var{to}. + +If the length of @var{from} is more than @var{size}, then @code{stpncpy} +copies just the first @var{size} bytes and returns a pointer to the +byte directly following the one which was copied last. Note that in +this case there is no null terminator written into @var{to}. + +If the length of @var{from} is less than @var{size}, then @code{stpncpy} +copies all of @var{from}, followed by enough null bytes to add up +to @var{size} bytes in all. This behavior is rarely useful, but it +is implemented to be useful in contexts where this behavior of the +@code{strncpy} is used. @code{stpncpy} returns a pointer to the +@emph{first} written null byte. + +This function is not part of ISO or POSIX but was found useful while +developing @theglibc{} itself. + +Its behavior is undefined if the strings overlap. The function is +declared in @file{string.h}. + +As noted below, this function is generally a poor choice for +processing text. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun {wchar_t *} wcpncpy (wchar_t *restrict @var{wto}, const wchar_t *restrict @var{wfrom}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{wcpcpy} but copies always exactly +@var{wsize} wide characters into @var{wto}. + +If the length of @var{wfrom} is more than @var{size}, then +@code{wcpncpy} copies just the first @var{size} wide characters and +returns a pointer to the wide character directly following the last +non-null wide character which was copied last. Note that in this case +there is no null terminator written into @var{wto}. + +If the length of @var{wfrom} is less than @var{size}, then @code{wcpncpy} +copies all of @var{wfrom}, followed by enough null wide characters to add up +to @var{size} wide characters in all. This behavior is rarely useful, but it +is implemented to be useful in contexts where this behavior of the +@code{wcsncpy} is used. @code{wcpncpy} returns a pointer to the +@emph{first} written null wide character. + +This function is not part of ISO or POSIX but was found useful while +developing @theglibc{} itself. + +Its behavior is undefined if the strings overlap. + +As noted below, this function is generally a poor choice for +processing text. + +@code{wcpncpy} is a GNU extension. +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun {char *} strncat (char *restrict @var{to}, const char *restrict @var{from}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is like @code{strcat} except that not more than @var{size} +bytes from @var{from} are appended to the end of @var{to}, and +@var{from} need not be null-terminated. A single null byte is also +always appended to @var{to}, so the total +allocated size of @var{to} must be at least @code{@var{size} + 1} bytes +longer than its initial length. + +The @code{strncat} function could be implemented like this: + +@smallexample +@group +char * +strncat (char *to, const char *from, size_t size) +@{ + size_t len = strlen (to); + memcpy (to + len, from, strnlen (from, size)); + to[len + strnlen (from, size)] = '\0'; + return to; +@} +@end group +@end smallexample + +The behavior of @code{strncat} is undefined if the strings overlap. + +As a companion to @code{strncpy}, @code{strncat} was designed for +now-rarely-used arrays consisting of non-null bytes followed by zero +or more null bytes. As noted below, this function is generally a poor +choice for processing text. Also, this function has significant +performance issues. @xref{Concatenating Strings}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcsncat (wchar_t *restrict @var{wto}, const wchar_t *restrict @var{wfrom}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is like @code{wcscat} except that not more than @var{size} +wide characters from @var{from} are appended to the end of @var{to}, +and @var{from} need not be null-terminated. A single null wide +character is also always appended to @var{to}, so the total allocated +size of @var{to} must be at least @code{wcsnlen (@var{wfrom}, +@var{size}) + 1} wide characters longer than its initial length. + +The @code{wcsncat} function could be implemented like this: + +@smallexample +@group +wchar_t * +wcsncat (wchar_t *restrict wto, const wchar_t *restrict wfrom, + size_t size) +@{ + size_t len = wcslen (wto); + memcpy (wto + len, wfrom, wcsnlen (wfrom, size) * sizeof (wchar_t)); + wto[len + wcsnlen (wfrom, size)] = L'\0'; + return wto; +@} +@end group +@end smallexample + +The behavior of @code{wcsncat} is undefined if the strings overlap. + +As noted below, this function is generally a poor choice for +processing text. Also, this function has significant performance +issues. @xref{Concatenating Strings}. +@end deftypefun + +Because these functions can abruptly truncate strings or wide strings, +they are generally poor choices for processing text. When coping or +concatening multibyte strings, they can truncate within a multibyte +character so that the result is not a valid multibyte string. When +combining or concatenating multibyte or wide strings, they may +truncate the output after a combining character, resulting in a +corrupted grapheme. They can cause bugs even when processing +single-byte strings: for example, when calculating an ASCII-only user +name, a truncated name can identify the wrong user. + +Although some buffer overruns can be prevented by manually replacing +calls to copying functions with calls to truncation functions, there +are often easier and safer automatic techniques that cause buffer +overruns to reliably terminate a program, such as GCC's +@option{-fcheck-pointer-bounds} and @option{-fsanitize=address} +options. @xref{Debugging Options,, Options for Debugging Your Program +or GCC, gcc.info, Using GCC}. Because truncation functions can mask +application bugs that would otherwise be caught by the automatic +techniques, these functions should be used only when the application's +underlying logic requires truncation. + +@strong{Note:} GNU programs should not truncate strings or wide +strings to fit arbitrary size limits. @xref{Semantics, , Writing +Robust Programs, standards, The GNU Coding Standards}. Instead of +string-truncation functions, it is usually better to use dynamic +memory allocation (@pxref{Unconstrained Allocation}) and functions +such as @code{strdup} or @code{asprintf} to construct strings. + +@node String/Array Comparison +@section String/Array Comparison +@cindex comparing strings and arrays +@cindex string comparison functions +@cindex array comparison functions +@cindex predicates on strings +@cindex predicates on arrays + +You can use the functions in this section to perform comparisons on the +contents of strings and arrays. As well as checking for equality, these +functions can also be used as the ordering functions for sorting +operations. @xref{Searching and Sorting}, for an example of this. + +Unlike most comparison operations in C, the string comparison functions +return a nonzero value if the strings are @emph{not} equivalent rather +than if they are. The sign of the value indicates the relative ordering +of the first part of the strings that are not equivalent: a +negative value indicates that the first string is ``less'' than the +second, while a positive value indicates that the first string is +``greater''. + +The most common use of these functions is to check only for equality. +This is canonically done with an expression like @w{@samp{! strcmp (s1, s2)}}. + +All of these functions are declared in the header file @file{string.h}. +@pindex string.h + +@comment string.h +@comment ISO +@deftypefun int memcmp (const void *@var{a1}, const void *@var{a2}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The function @code{memcmp} compares the @var{size} bytes of memory +beginning at @var{a1} against the @var{size} bytes of memory beginning +at @var{a2}. The value returned has the same sign as the difference +between the first differing pair of bytes (interpreted as @code{unsigned +char} objects, then promoted to @code{int}). + +If the contents of the two blocks are equal, @code{memcmp} returns +@code{0}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int wmemcmp (const wchar_t *@var{a1}, const wchar_t *@var{a2}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The function @code{wmemcmp} compares the @var{size} wide characters +beginning at @var{a1} against the @var{size} wide characters beginning +at @var{a2}. The value returned is smaller than or larger than zero +depending on whether the first differing wide character is @var{a1} is +smaller or larger than the corresponding wide character in @var{a2}. + +If the contents of the two blocks are equal, @code{wmemcmp} returns +@code{0}. +@end deftypefun + +On arbitrary arrays, the @code{memcmp} function is mostly useful for +testing equality. It usually isn't meaningful to do byte-wise ordering +comparisons on arrays of things other than bytes. For example, a +byte-wise comparison on the bytes that make up floating-point numbers +isn't likely to tell you anything about the relationship between the +values of the floating-point numbers. + +@code{wmemcmp} is really only useful to compare arrays of type +@code{wchar_t} since the function looks at @code{sizeof (wchar_t)} bytes +at a time and this number of bytes is system dependent. + +You should also be careful about using @code{memcmp} to compare objects +that can contain ``holes'', such as the padding inserted into structure +objects to enforce alignment requirements, extra space at the end of +unions, and extra bytes at the ends of strings whose length is less +than their allocated size. The contents of these ``holes'' are +indeterminate and may cause strange behavior when performing byte-wise +comparisons. For more predictable results, perform an explicit +component-wise comparison. + +For example, given a structure type definition like: + +@smallexample +struct foo + @{ + unsigned char tag; + union + @{ + double f; + long i; + char *p; + @} value; + @}; +@end smallexample + +@noindent +you are better off writing a specialized comparison function to compare +@code{struct foo} objects instead of comparing them with @code{memcmp}. + +@comment string.h +@comment ISO +@deftypefun int strcmp (const char *@var{s1}, const char *@var{s2}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{strcmp} function compares the string @var{s1} against +@var{s2}, returning a value that has the same sign as the difference +between the first differing pair of bytes (interpreted as +@code{unsigned char} objects, then promoted to @code{int}). + +If the two strings are equal, @code{strcmp} returns @code{0}. + +A consequence of the ordering used by @code{strcmp} is that if @var{s1} +is an initial substring of @var{s2}, then @var{s1} is considered to be +``less than'' @var{s2}. + +@code{strcmp} does not take sorting conventions of the language the +strings are written in into account. To get that one has to use +@code{strcoll}. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int wcscmp (const wchar_t *@var{ws1}, const wchar_t *@var{ws2}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +The @code{wcscmp} function compares the wide string @var{ws1} +against @var{ws2}. The value returned is smaller than or larger than zero +depending on whether the first differing wide character is @var{ws1} is +smaller or larger than the corresponding wide character in @var{ws2}. + +If the two strings are equal, @code{wcscmp} returns @code{0}. + +A consequence of the ordering used by @code{wcscmp} is that if @var{ws1} +is an initial substring of @var{ws2}, then @var{ws1} is considered to be +``less than'' @var{ws2}. + +@code{wcscmp} does not take sorting conventions of the language the +strings are written in into account. To get that one has to use +@code{wcscoll}. +@end deftypefun + +@comment string.h +@comment BSD +@deftypefun int strcasecmp (const char *@var{s1}, const char *@var{s2}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c Although this calls tolower multiple times, it's a macro, and +@c strcasecmp is optimized so that the locale pointer is read only once. +@c There are some asm implementations too, for which the single-read +@c from locale TLS pointers also applies. +This function is like @code{strcmp}, except that differences in case are +ignored, and its arguments must be multibyte strings. +How uppercase and lowercase characters are related is +determined by the currently selected locale. In the standard @code{"C"} +locale the characters @"A and @"a do not match but in a locale which +regards these characters as parts of the alphabet they do match. + +@noindent +@code{strcasecmp} is derived from BSD. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun int wcscasecmp (const wchar_t *@var{ws1}, const wchar_t *@var{ws2}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c Since towlower is not a macro, the locale object may be read multiple +@c times. +This function is like @code{wcscmp}, except that differences in case are +ignored. How uppercase and lowercase characters are related is +determined by the currently selected locale. In the standard @code{"C"} +locale the characters @"A and @"a do not match but in a locale which +regards these characters as parts of the alphabet they do match. + +@noindent +@code{wcscasecmp} is a GNU extension. +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun int strncmp (const char *@var{s1}, const char *@var{s2}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is the similar to @code{strcmp}, except that no more than +@var{size} bytes are compared. In other words, if the two +strings are the same in their first @var{size} bytes, the +return value is zero. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int wcsncmp (const wchar_t *@var{ws1}, const wchar_t *@var{ws2}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function is similar to @code{wcscmp}, except that no more than +@var{size} wide characters are compared. In other words, if the two +strings are the same in their first @var{size} wide characters, the +return value is zero. +@end deftypefun + +@comment string.h +@comment BSD +@deftypefun int strncasecmp (const char *@var{s1}, const char *@var{s2}, size_t @var{n}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +This function is like @code{strncmp}, except that differences in case +are ignored, and the compared parts of the arguments should consist of +valid multibyte characters. +Like @code{strcasecmp}, it is locale dependent how +uppercase and lowercase characters are related. + +@noindent +@code{strncasecmp} is a GNU extension. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun int wcsncasecmp (const wchar_t *@var{ws1}, const wchar_t *@var{s2}, size_t @var{n}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +This function is like @code{wcsncmp}, except that differences in case +are ignored. Like @code{wcscasecmp}, it is locale dependent how +uppercase and lowercase characters are related. + +@noindent +@code{wcsncasecmp} is a GNU extension. +@end deftypefun + +Here are some examples showing the use of @code{strcmp} and +@code{strncmp} (equivalent examples can be constructed for the wide +character functions). These examples assume the use of the ASCII +character set. (If some other character set---say, EBCDIC---is used +instead, then the glyphs are associated with different numeric codes, +and the return values and ordering may differ.) + +@smallexample +strcmp ("hello", "hello") + @result{} 0 /* @r{These two strings are the same.} */ +strcmp ("hello", "Hello") + @result{} 32 /* @r{Comparisons are case-sensitive.} */ +strcmp ("hello", "world") + @result{} -15 /* @r{The byte @code{'h'} comes before @code{'w'}.} */ +strcmp ("hello", "hello, world") + @result{} -44 /* @r{Comparing a null byte against a comma.} */ +strncmp ("hello", "hello, world", 5) + @result{} 0 /* @r{The initial 5 bytes are the same.} */ +strncmp ("hello, world", "hello, stupid world!!!", 5) + @result{} 0 /* @r{The initial 5 bytes are the same.} */ +@end smallexample + +@comment string.h +@comment GNU +@deftypefun int strverscmp (const char *@var{s1}, const char *@var{s2}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c Calls isdigit multiple times, locale may change in between. +The @code{strverscmp} function compares the string @var{s1} against +@var{s2}, considering them as holding indices/version numbers. The +return value follows the same conventions as found in the +@code{strcmp} function. In fact, if @var{s1} and @var{s2} contain no +digits, @code{strverscmp} behaves like @code{strcmp} +(in the sense that the sign of the result is the same). + +The comparison algorithm which the @code{strverscmp} function implements +differs slightly from other version-comparison algorithms. The +implementation is based on a finite-state machine, whose behavior is +approximated below. + +@itemize @bullet +@item +The input strings are each split into sequences of non-digits and +digits. These sequences can be empty at the beginning and end of the +string. Digits are determined by the @code{isdigit} function and are +thus subject to the current locale. + +@item +Comparison starts with a (possibly empty) non-digit sequence. The first +non-equal sequences of non-digits or digits determines the outcome of +the comparison. + +@item +Corresponding non-digit sequences in both strings are compared +lexicographically if their lengths are equal. If the lengths differ, +the shorter non-digit sequence is extended with the input string +character immediately following it (which may be the null terminator), +the other sequence is truncated to be of the same (extended) length, and +these two sequences are compared lexicographically. In the last case, +the sequence comparison determines the result of the function because +the extension character (or some character before it) is necessarily +different from the character at the same offset in the other input +string. + +@item +For two sequences of digits, the number of leading zeros is counted (which +can be zero). If the count differs, the string with more leading zeros +in the digit sequence is considered smaller than the other string. + +@item +If the two sequences of digits have no leading zeros, they are compared +as integers, that is, the string with the longer digit sequence is +deemed larger, and if both sequences are of equal length, they are +compared lexicographically. + +@item +If both digit sequences start with a zero and have an equal number of +leading zeros, they are compared lexicographically if their lengths are +the same. If the lengths differ, the shorter sequence is extended with +the following character in its input string, and the other sequence is +truncated to the same length, and both sequences are compared +lexicographically (similar to the non-digit sequence case above). +@end itemize + +The treatment of leading zeros and the tie-breaking extension characters +(which in effect propagate across non-digit/digit sequence boundaries) +differs from other version-comparison algorithms. + +@smallexample +strverscmp ("no digit", "no digit") + @result{} 0 /* @r{same behavior as strcmp.} */ +strverscmp ("item#99", "item#100") + @result{} <0 /* @r{same prefix, but 99 < 100.} */ +strverscmp ("alpha1", "alpha001") + @result{} >0 /* @r{different number of leading zeros (0 and 2).} */ +strverscmp ("part1_f012", "part1_f01") + @result{} >0 /* @r{lexicographical comparison with leading zeros.} */ +strverscmp ("foo.009", "foo.0") + @result{} <0 /* @r{different number of leading zeros (2 and 1).} */ +@end smallexample + +@code{strverscmp} is a GNU extension. +@end deftypefun + +@comment string.h +@comment BSD +@deftypefun int bcmp (const void *@var{a1}, const void *@var{a2}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is an obsolete alias for @code{memcmp}, derived from BSD. +@end deftypefun + +@node Collation Functions +@section Collation Functions + +@cindex collating strings +@cindex string collation functions + +In some locales, the conventions for lexicographic ordering differ from +the strict numeric ordering of character codes. For example, in Spanish +most glyphs with diacritical marks such as accents are not considered +distinct letters for the purposes of collation. On the other hand, the +two-character sequence @samp{ll} is treated as a single letter that is +collated immediately after @samp{l}. + +You can use the functions @code{strcoll} and @code{strxfrm} (declared in +the headers file @file{string.h}) and @code{wcscoll} and @code{wcsxfrm} +(declared in the headers file @file{wchar}) to compare strings using a +collation ordering appropriate for the current locale. The locale used +by these functions in particular can be specified by setting the locale +for the @code{LC_COLLATE} category; see @ref{Locales}. +@pindex string.h +@pindex wchar.h + +In the standard C locale, the collation sequence for @code{strcoll} is +the same as that for @code{strcmp}. Similarly, @code{wcscoll} and +@code{wcscmp} are the same in this situation. + +Effectively, the way these functions work is by applying a mapping to +transform the characters in a multibyte string to a byte +sequence that represents +the string's position in the collating sequence of the current locale. +Comparing two such byte sequences in a simple fashion is equivalent to +comparing the strings with the locale's collating sequence. + +The functions @code{strcoll} and @code{wcscoll} perform this translation +implicitly, in order to do one comparison. By contrast, @code{strxfrm} +and @code{wcsxfrm} perform the mapping explicitly. If you are making +multiple comparisons using the same string or set of strings, it is +likely to be more efficient to use @code{strxfrm} or @code{wcsxfrm} to +transform all the strings just once, and subsequently compare the +transformed strings with @code{strcmp} or @code{wcscmp}. + +@comment string.h +@comment ISO +@deftypefun int strcoll (const char *@var{s1}, const char *@var{s2}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c Calls strcoll_l with the current locale, which dereferences only the +@c LC_COLLATE data pointer. +The @code{strcoll} function is similar to @code{strcmp} but uses the +collating sequence of the current locale for collation (the +@code{LC_COLLATE} locale). The arguments are multibyte strings. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun int wcscoll (const wchar_t *@var{ws1}, const wchar_t *@var{ws2}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c Same as strcoll, but calling wcscoll_l. +The @code{wcscoll} function is similar to @code{wcscmp} but uses the +collating sequence of the current locale for collation (the +@code{LC_COLLATE} locale). +@end deftypefun + +Here is an example of sorting an array of strings, using @code{strcoll} +to compare them. The actual sort algorithm is not written here; it +comes from @code{qsort} (@pxref{Array Sort Function}). The job of the +code shown here is to say how to compare the strings while sorting them. +(Later on in this section, we will show a way to do this more +efficiently using @code{strxfrm}.) + +@smallexample +/* @r{This is the comparison function used with @code{qsort}.} */ + +int +compare_elements (const void *v1, const void *v2) +@{ + char * const *p1 = v1; + char * const *p2 = v2; + + return strcoll (*p1, *p2); +@} + +/* @r{This is the entry point---the function to sort} + @r{strings using the locale's collating sequence.} */ + +void +sort_strings (char **array, int nstrings) +@{ + /* @r{Sort @code{temp_array} by comparing the strings.} */ + qsort (array, nstrings, + sizeof (char *), compare_elements); +@} +@end smallexample + +@cindex converting string to collation order +@comment string.h +@comment ISO +@deftypefun size_t strxfrm (char *restrict @var{to}, const char *restrict @var{from}, size_t @var{size}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The function @code{strxfrm} transforms the multibyte string +@var{from} using the +collation transformation determined by the locale currently selected for +collation, and stores the transformed string in the array @var{to}. Up +to @var{size} bytes (including a terminating null byte) are +stored. + +The behavior is undefined if the strings @var{to} and @var{from} +overlap; see @ref{Copying Strings and Arrays}. + +The return value is the length of the entire transformed string. This +value is not affected by the value of @var{size}, but if it is greater +or equal than @var{size}, it means that the transformed string did not +entirely fit in the array @var{to}. In this case, only as much of the +string as actually fits was stored. To get the whole transformed +string, call @code{strxfrm} again with a bigger output array. + +The transformed string may be longer than the original string, and it +may also be shorter. + +If @var{size} is zero, no bytes are stored in @var{to}. In this +case, @code{strxfrm} simply returns the number of bytes that would +be the length of the transformed string. This is useful for determining +what size the allocated array should be. It does not matter what +@var{to} is if @var{size} is zero; @var{to} may even be a null pointer. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun size_t wcsxfrm (wchar_t *restrict @var{wto}, const wchar_t *@var{wfrom}, size_t @var{size}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The function @code{wcsxfrm} transforms wide string @var{wfrom} +using the collation transformation determined by the locale currently +selected for collation, and stores the transformed string in the array +@var{wto}. Up to @var{size} wide characters (including a terminating null +wide character) are stored. + +The behavior is undefined if the strings @var{wto} and @var{wfrom} +overlap; see @ref{Copying Strings and Arrays}. + +The return value is the length of the entire transformed wide +string. This value is not affected by the value of @var{size}, but if +it is greater or equal than @var{size}, it means that the transformed +wide string did not entirely fit in the array @var{wto}. In +this case, only as much of the wide string as actually fits +was stored. To get the whole transformed wide string, call +@code{wcsxfrm} again with a bigger output array. + +The transformed wide string may be longer than the original +wide string, and it may also be shorter. + +If @var{size} is zero, no wide characters are stored in @var{to}. In this +case, @code{wcsxfrm} simply returns the number of wide characters that +would be the length of the transformed wide string. This is +useful for determining what size the allocated array should be (remember +to multiply with @code{sizeof (wchar_t)}). It does not matter what +@var{wto} is if @var{size} is zero; @var{wto} may even be a null pointer. +@end deftypefun + +Here is an example of how you can use @code{strxfrm} when +you plan to do many comparisons. It does the same thing as the previous +example, but much faster, because it has to transform each string only +once, no matter how many times it is compared with other strings. Even +the time needed to allocate and free storage is much less than the time +we save, when there are many strings. + +@smallexample +struct sorter @{ char *input; char *transformed; @}; + +/* @r{This is the comparison function used with @code{qsort}} + @r{to sort an array of @code{struct sorter}.} */ + +int +compare_elements (const void *v1, const void *v2) +@{ + const struct sorter *p1 = v1; + const struct sorter *p2 = v2; + + return strcmp (p1->transformed, p2->transformed); +@} + +/* @r{This is the entry point---the function to sort} + @r{strings using the locale's collating sequence.} */ + +void +sort_strings_fast (char **array, int nstrings) +@{ + struct sorter temp_array[nstrings]; + int i; + + /* @r{Set up @code{temp_array}. Each element contains} + @r{one input string and its transformed string.} */ + for (i = 0; i < nstrings; i++) + @{ + size_t length = strlen (array[i]) * 2; + char *transformed; + size_t transformed_length; + + temp_array[i].input = array[i]; + + /* @r{First try a buffer perhaps big enough.} */ + transformed = (char *) xmalloc (length); + + /* @r{Transform @code{array[i]}.} */ + transformed_length = strxfrm (transformed, array[i], length); + + /* @r{If the buffer was not large enough, resize it} + @r{and try again.} */ + if (transformed_length >= length) + @{ + /* @r{Allocate the needed space. +1 for terminating} + @r{@code{'\0'} byte.} */ + transformed = (char *) xrealloc (transformed, + transformed_length + 1); + + /* @r{The return value is not interesting because we know} + @r{how long the transformed string is.} */ + (void) strxfrm (transformed, array[i], + transformed_length + 1); + @} + + temp_array[i].transformed = transformed; + @} + + /* @r{Sort @code{temp_array} by comparing transformed strings.} */ + qsort (temp_array, nstrings, + sizeof (struct sorter), compare_elements); + + /* @r{Put the elements back in the permanent array} + @r{in their sorted order.} */ + for (i = 0; i < nstrings; i++) + array[i] = temp_array[i].input; + + /* @r{Free the strings we allocated.} */ + for (i = 0; i < nstrings; i++) + free (temp_array[i].transformed); +@} +@end smallexample + +The interesting part of this code for the wide character version would +look like this: + +@smallexample +void +sort_strings_fast (wchar_t **array, int nstrings) +@{ + @dots{} + /* @r{Transform @code{array[i]}.} */ + transformed_length = wcsxfrm (transformed, array[i], length); + + /* @r{If the buffer was not large enough, resize it} + @r{and try again.} */ + if (transformed_length >= length) + @{ + /* @r{Allocate the needed space. +1 for terminating} + @r{@code{L'\0'} wide character.} */ + transformed = (wchar_t *) xrealloc (transformed, + (transformed_length + 1) + * sizeof (wchar_t)); + + /* @r{The return value is not interesting because we know} + @r{how long the transformed string is.} */ + (void) wcsxfrm (transformed, array[i], + transformed_length + 1); + @} + @dots{} +@end smallexample + +@noindent +Note the additional multiplication with @code{sizeof (wchar_t)} in the +@code{realloc} call. + +@strong{Compatibility Note:} The string collation functions are a new +feature of @w{ISO C90}. Older C dialects have no equivalent feature. +The wide character versions were introduced in @w{Amendment 1} to @w{ISO +C90}. + +@node Search Functions +@section Search Functions + +This section describes library functions which perform various kinds +of searching operations on strings and arrays. These functions are +declared in the header file @file{string.h}. +@pindex string.h +@cindex search functions (for strings) +@cindex string search functions + +@comment string.h +@comment ISO +@deftypefun {void *} memchr (const void *@var{block}, int @var{c}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function finds the first occurrence of the byte @var{c} (converted +to an @code{unsigned char}) in the initial @var{size} bytes of the +object beginning at @var{block}. The return value is a pointer to the +located byte, or a null pointer if no match was found. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wmemchr (const wchar_t *@var{block}, wchar_t @var{wc}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function finds the first occurrence of the wide character @var{wc} +in the initial @var{size} wide characters of the object beginning at +@var{block}. The return value is a pointer to the located wide +character, or a null pointer if no match was found. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefun {void *} rawmemchr (const void *@var{block}, int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Often the @code{memchr} function is used with the knowledge that the +byte @var{c} is available in the memory block specified by the +parameters. But this means that the @var{size} parameter is not really +needed and that the tests performed with it at runtime (to check whether +the end of the block is reached) are not needed. + +The @code{rawmemchr} function exists for just this situation which is +surprisingly frequent. The interface is similar to @code{memchr} except +that the @var{size} parameter is missing. The function will look beyond +the end of the block pointed to by @var{block} in case the programmer +made an error in assuming that the byte @var{c} is present in the block. +In this case the result is unspecified. Otherwise the return value is a +pointer to the located byte. + +This function is of special interest when looking for the end of a +string. Since all strings are terminated by a null byte a call like + +@smallexample + rawmemchr (str, '\0') +@end smallexample + +@noindent +will never go beyond the end of the string. + +This function is a GNU extension. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefun {void *} memrchr (const void *@var{block}, int @var{c}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The function @code{memrchr} is like @code{memchr}, except that it searches +backwards from the end of the block defined by @var{block} and @var{size} +(instead of forwards from the front). + +This function is a GNU extension. +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun {char *} strchr (const char *@var{string}, int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{strchr} function finds the first occurrence of the byte +@var{c} (converted to a @code{char}) in the string +beginning at @var{string}. The return value is a pointer to the located +byte, or a null pointer if no match was found. + +For example, +@smallexample +strchr ("hello, world", 'l') + @result{} "llo, world" +strchr ("hello, world", '?') + @result{} NULL +@end smallexample + +The terminating null byte is considered to be part of the string, +so you can use this function get a pointer to the end of a string by +specifying zero as the value of the @var{c} argument. + +When @code{strchr} returns a null pointer, it does not let you know +the position of the terminating null byte it has found. If you +need that information, it is better (but less portable) to use +@code{strchrnul} than to search for it a second time. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcschr (const wchar_t *@var{wstring}, int @var{wc}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{wcschr} function finds the first occurrence of the wide +character @var{wc} in the wide string +beginning at @var{wstring}. The return value is a pointer to the +located wide character, or a null pointer if no match was found. + +The terminating null wide character is considered to be part of the wide +string, so you can use this function get a pointer to the end +of a wide string by specifying a null wide character as the +value of the @var{wc} argument. It would be better (but less portable) +to use @code{wcschrnul} in this case, though. +@end deftypefun + +@comment string.h +@comment GNU +@deftypefun {char *} strchrnul (const char *@var{string}, int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{strchrnul} is the same as @code{strchr} except that if it does +not find the byte, it returns a pointer to string's terminating +null byte rather than a null pointer. + +This function is a GNU extension. +@end deftypefun + +@comment wchar.h +@comment GNU +@deftypefun {wchar_t *} wcschrnul (const wchar_t *@var{wstring}, wchar_t @var{wc}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{wcschrnul} is the same as @code{wcschr} except that if it does not +find the wide character, it returns a pointer to the wide string's +terminating null wide character rather than a null pointer. + +This function is a GNU extension. +@end deftypefun + +One useful, but unusual, use of the @code{strchr} +function is when one wants to have a pointer pointing to the null byte +terminating a string. This is often written in this way: + +@smallexample + s += strlen (s); +@end smallexample + +@noindent +This is almost optimal but the addition operation duplicated a bit of +the work already done in the @code{strlen} function. A better solution +is this: + +@smallexample + s = strchr (s, '\0'); +@end smallexample + +There is no restriction on the second parameter of @code{strchr} so it +could very well also be zero. Those readers thinking very +hard about this might now point out that the @code{strchr} function is +more expensive than the @code{strlen} function since we have two abort +criteria. This is right. But in @theglibc{} the implementation of +@code{strchr} is optimized in a special way so that @code{strchr} +actually is faster. + +@comment string.h +@comment ISO +@deftypefun {char *} strrchr (const char *@var{string}, int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The function @code{strrchr} is like @code{strchr}, except that it searches +backwards from the end of the string @var{string} (instead of forwards +from the front). + +For example, +@smallexample +strrchr ("hello, world", 'l') + @result{} "ld" +@end smallexample +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcsrchr (const wchar_t *@var{wstring}, wchar_t @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The function @code{wcsrchr} is like @code{wcschr}, except that it searches +backwards from the end of the string @var{wstring} (instead of forwards +from the front). +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun {char *} strstr (const char *@var{haystack}, const char *@var{needle}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is like @code{strchr}, except that it searches @var{haystack} for a +substring @var{needle} rather than just a single byte. It +returns a pointer into the string @var{haystack} that is the first +byte of the substring, or a null pointer if no match was found. If +@var{needle} is an empty string, the function returns @var{haystack}. + +For example, +@smallexample +strstr ("hello, world", "l") + @result{} "llo, world" +strstr ("hello, world", "wo") + @result{} "world" +@end smallexample +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcsstr (const wchar_t *@var{haystack}, const wchar_t *@var{needle}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is like @code{wcschr}, except that it searches @var{haystack} for a +substring @var{needle} rather than just a single wide character. It +returns a pointer into the string @var{haystack} that is the first wide +character of the substring, or a null pointer if no match was found. If +@var{needle} is an empty string, the function returns @var{haystack}. +@end deftypefun + +@comment wchar.h +@comment XPG +@deftypefun {wchar_t *} wcswcs (const wchar_t *@var{haystack}, const wchar_t *@var{needle}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{wcswcs} is a deprecated alias for @code{wcsstr}. This is the +name originally used in the X/Open Portability Guide before the +@w{Amendment 1} to @w{ISO C90} was published. +@end deftypefun + + +@comment string.h +@comment GNU +@deftypefun {char *} strcasestr (const char *@var{haystack}, const char *@var{needle}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c There may be multiple calls of strncasecmp, each accessing the locale +@c object independently. +This is like @code{strstr}, except that it ignores case in searching for +the substring. Like @code{strcasecmp}, it is locale dependent how +uppercase and lowercase characters are related, and arguments are +multibyte strings. + + +For example, +@smallexample +strcasestr ("hello, world", "L") + @result{} "llo, world" +strcasestr ("hello, World", "wo") + @result{} "World" +@end smallexample +@end deftypefun + + +@comment string.h +@comment GNU +@deftypefun {void *} memmem (const void *@var{haystack}, size_t @var{haystack-len},@*const void *@var{needle}, size_t @var{needle-len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is like @code{strstr}, but @var{needle} and @var{haystack} are byte +arrays rather than strings. @var{needle-len} is the +length of @var{needle} and @var{haystack-len} is the length of +@var{haystack}.@refill + +This function is a GNU extension. +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun size_t strspn (const char *@var{string}, const char *@var{skipset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{strspn} (``string span'') function returns the length of the +initial substring of @var{string} that consists entirely of bytes that +are members of the set specified by the string @var{skipset}. The order +of the bytes in @var{skipset} is not important. + +For example, +@smallexample +strspn ("hello, world", "abcdefghijklmnopqrstuvwxyz") + @result{} 5 +@end smallexample + +In a multibyte string, characters consisting of +more than one byte are not treated as single entities. Each byte is treated +separately. The function is not locale-dependent. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun size_t wcsspn (const wchar_t *@var{wstring}, const wchar_t *@var{skipset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{wcsspn} (``wide character string span'') function returns the +length of the initial substring of @var{wstring} that consists entirely +of wide characters that are members of the set specified by the string +@var{skipset}. The order of the wide characters in @var{skipset} is not +important. +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun size_t strcspn (const char *@var{string}, const char *@var{stopset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{strcspn} (``string complement span'') function returns the length +of the initial substring of @var{string} that consists entirely of bytes +that are @emph{not} members of the set specified by the string @var{stopset}. +(In other words, it returns the offset of the first byte in @var{string} +that is a member of the set @var{stopset}.) + +For example, +@smallexample +strcspn ("hello, world", " \t\n,.;!?") + @result{} 5 +@end smallexample + +In a multibyte string, characters consisting of +more than one byte are not treated as a single entities. Each byte is treated +separately. The function is not locale-dependent. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun size_t wcscspn (const wchar_t *@var{wstring}, const wchar_t *@var{stopset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{wcscspn} (``wide character string complement span'') function +returns the length of the initial substring of @var{wstring} that +consists entirely of wide characters that are @emph{not} members of the +set specified by the string @var{stopset}. (In other words, it returns +the offset of the first wide character in @var{string} that is a member of +the set @var{stopset}.) +@end deftypefun + +@comment string.h +@comment ISO +@deftypefun {char *} strpbrk (const char *@var{string}, const char *@var{stopset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{strpbrk} (``string pointer break'') function is related to +@code{strcspn}, except that it returns a pointer to the first byte +in @var{string} that is a member of the set @var{stopset} instead of the +length of the initial substring. It returns a null pointer if no such +byte from @var{stopset} is found. + +@c @group Invalid outside the example. +For example, + +@smallexample +strpbrk ("hello, world", " \t\n,.;!?") + @result{} ", world" +@end smallexample +@c @end group + +In a multibyte string, characters consisting of +more than one byte are not treated as single entities. Each byte is treated +separately. The function is not locale-dependent. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcspbrk (const wchar_t *@var{wstring}, const wchar_t *@var{stopset}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{wcspbrk} (``wide character string pointer break'') function is +related to @code{wcscspn}, except that it returns a pointer to the first +wide character in @var{wstring} that is a member of the set +@var{stopset} instead of the length of the initial substring. It +returns a null pointer if no such wide character from @var{stopset} is found. +@end deftypefun + + +@subsection Compatibility String Search Functions + +@comment string.h +@comment BSD +@deftypefun {char *} index (const char *@var{string}, int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{index} is another name for @code{strchr}; they are exactly the same. +New code should always use @code{strchr} since this name is defined in +@w{ISO C} while @code{index} is a BSD invention which never was available +on @w{System V} derived systems. +@end deftypefun + +@comment string.h +@comment BSD +@deftypefun {char *} rindex (const char *@var{string}, int @var{c}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{rindex} is another name for @code{strrchr}; they are exactly the same. +New code should always use @code{strrchr} since this name is defined in +@w{ISO C} while @code{rindex} is a BSD invention which never was available +on @w{System V} derived systems. +@end deftypefun + +@node Finding Tokens in a String +@section Finding Tokens in a String + +@cindex tokenizing strings +@cindex breaking a string into tokens +@cindex parsing tokens from a string +It's fairly common for programs to have a need to do some simple kinds +of lexical analysis and parsing, such as splitting a command string up +into tokens. You can do this with the @code{strtok} function, declared +in the header file @file{string.h}. +@pindex string.h + +@comment string.h +@comment ISO +@deftypefun {char *} strtok (char *restrict @var{newstring}, const char *restrict @var{delimiters}) +@safety{@prelim{}@mtunsafe{@mtasurace{:strtok}}@asunsafe{}@acsafe{}} +A string can be split into tokens by making a series of calls to the +function @code{strtok}. + +The string to be split up is passed as the @var{newstring} argument on +the first call only. The @code{strtok} function uses this to set up +some internal state information. Subsequent calls to get additional +tokens from the same string are indicated by passing a null pointer as +the @var{newstring} argument. Calling @code{strtok} with another +non-null @var{newstring} argument reinitializes the state information. +It is guaranteed that no other library function ever calls @code{strtok} +behind your back (which would mess up this internal state information). + +The @var{delimiters} argument is a string that specifies a set of delimiters +that may surround the token being extracted. All the initial bytes +that are members of this set are discarded. The first byte that is +@emph{not} a member of this set of delimiters marks the beginning of the +next token. The end of the token is found by looking for the next +byte that is a member of the delimiter set. This byte in the +original string @var{newstring} is overwritten by a null byte, and the +pointer to the beginning of the token in @var{newstring} is returned. + +On the next call to @code{strtok}, the searching begins at the next +byte beyond the one that marked the end of the previous token. +Note that the set of delimiters @var{delimiters} do not have to be the +same on every call in a series of calls to @code{strtok}. + +If the end of the string @var{newstring} is reached, or if the remainder of +string consists only of delimiter bytes, @code{strtok} returns +a null pointer. + +In a multibyte string, characters consisting of +more than one byte are not treated as single entities. Each byte is treated +separately. The function is not locale-dependent. +@end deftypefun + +@comment wchar.h +@comment ISO +@deftypefun {wchar_t *} wcstok (wchar_t *@var{newstring}, const wchar_t *@var{delimiters}, wchar_t **@var{save_ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +A string can be split into tokens by making a series of calls to the +function @code{wcstok}. + +The string to be split up is passed as the @var{newstring} argument on +the first call only. The @code{wcstok} function uses this to set up +some internal state information. Subsequent calls to get additional +tokens from the same wide string are indicated by passing a +null pointer as the @var{newstring} argument, which causes the pointer +previously stored in @var{save_ptr} to be used instead. + +The @var{delimiters} argument is a wide string that specifies +a set of delimiters that may surround the token being extracted. All +the initial wide characters that are members of this set are discarded. +The first wide character that is @emph{not} a member of this set of +delimiters marks the beginning of the next token. The end of the token +is found by looking for the next wide character that is a member of the +delimiter set. This wide character in the original wide +string @var{newstring} is overwritten by a null wide character, the +pointer past the overwritten wide character is saved in @var{save_ptr}, +and the pointer to the beginning of the token in @var{newstring} is +returned. + +On the next call to @code{wcstok}, the searching begins at the next +wide character beyond the one that marked the end of the previous token. +Note that the set of delimiters @var{delimiters} do not have to be the +same on every call in a series of calls to @code{wcstok}. + +If the end of the wide string @var{newstring} is reached, or +if the remainder of string consists only of delimiter wide characters, +@code{wcstok} returns a null pointer. +@end deftypefun + +@strong{Warning:} Since @code{strtok} and @code{wcstok} alter the string +they is parsing, you should always copy the string to a temporary buffer +before parsing it with @code{strtok}/@code{wcstok} (@pxref{Copying Strings +and Arrays}). If you allow @code{strtok} or @code{wcstok} to modify +a string that came from another part of your program, you are asking for +trouble; that string might be used for other purposes after +@code{strtok} or @code{wcstok} has modified it, and it would not have +the expected value. + +The string that you are operating on might even be a constant. Then +when @code{strtok} or @code{wcstok} tries to modify it, your program +will get a fatal signal for writing in read-only memory. @xref{Program +Error Signals}. Even if the operation of @code{strtok} or @code{wcstok} +would not require a modification of the string (e.g., if there is +exactly one token) the string can (and in the @glibcadj{} case will) be +modified. + +This is a special case of a general principle: if a part of a program +does not have as its purpose the modification of a certain data +structure, then it is error-prone to modify the data structure +temporarily. + +The function @code{strtok} is not reentrant, whereas @code{wcstok} is. +@xref{Nonreentrancy}, for a discussion of where and why reentrancy is +important. + +Here is a simple example showing the use of @code{strtok}. + +@comment Yes, this example has been tested. +@smallexample +#include <string.h> +#include <stddef.h> + +@dots{} + +const char string[] = "words separated by spaces -- and, punctuation!"; +const char delimiters[] = " .,;:!-"; +char *token, *cp; + +@dots{} + +cp = strdupa (string); /* Make writable copy. */ +token = strtok (cp, delimiters); /* token => "words" */ +token = strtok (NULL, delimiters); /* token => "separated" */ +token = strtok (NULL, delimiters); /* token => "by" */ +token = strtok (NULL, delimiters); /* token => "spaces" */ +token = strtok (NULL, delimiters); /* token => "and" */ +token = strtok (NULL, delimiters); /* token => "punctuation" */ +token = strtok (NULL, delimiters); /* token => NULL */ +@end smallexample + +@Theglibc{} contains two more functions for tokenizing a string +which overcome the limitation of non-reentrancy. They are not +available available for wide strings. + +@comment string.h +@comment POSIX +@deftypefun {char *} strtok_r (char *@var{newstring}, const char *@var{delimiters}, char **@var{save_ptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Just like @code{strtok}, this function splits the string into several +tokens which can be accessed by successive calls to @code{strtok_r}. +The difference is that, as in @code{wcstok}, the information about the +next token is stored in the space pointed to by the third argument, +@var{save_ptr}, which is a pointer to a string pointer. Calling +@code{strtok_r} with a null pointer for @var{newstring} and leaving +@var{save_ptr} between the calls unchanged does the job without +hindering reentrancy. + +This function is defined in POSIX.1 and can be found on many systems +which support multi-threading. +@end deftypefun + +@comment string.h +@comment BSD +@deftypefun {char *} strsep (char **@var{string_ptr}, const char *@var{delimiter}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function has a similar functionality as @code{strtok_r} with the +@var{newstring} argument replaced by the @var{save_ptr} argument. The +initialization of the moving pointer has to be done by the user. +Successive calls to @code{strsep} move the pointer along the tokens +separated by @var{delimiter}, returning the address of the next token +and updating @var{string_ptr} to point to the beginning of the next +token. + +One difference between @code{strsep} and @code{strtok_r} is that if the +input string contains more than one byte from @var{delimiter} in a +row @code{strsep} returns an empty string for each pair of bytes +from @var{delimiter}. This means that a program normally should test +for @code{strsep} returning an empty string before processing it. + +This function was introduced in 4.3BSD and therefore is widely available. +@end deftypefun + +Here is how the above example looks like when @code{strsep} is used. + +@comment Yes, this example has been tested. +@smallexample +#include <string.h> +#include <stddef.h> + +@dots{} + +const char string[] = "words separated by spaces -- and, punctuation!"; +const char delimiters[] = " .,;:!-"; +char *running; +char *token; + +@dots{} + +running = strdupa (string); +token = strsep (&running, delimiters); /* token => "words" */ +token = strsep (&running, delimiters); /* token => "separated" */ +token = strsep (&running, delimiters); /* token => "by" */ +token = strsep (&running, delimiters); /* token => "spaces" */ +token = strsep (&running, delimiters); /* token => "" */ +token = strsep (&running, delimiters); /* token => "" */ +token = strsep (&running, delimiters); /* token => "" */ +token = strsep (&running, delimiters); /* token => "and" */ +token = strsep (&running, delimiters); /* token => "" */ +token = strsep (&running, delimiters); /* token => "punctuation" */ +token = strsep (&running, delimiters); /* token => "" */ +token = strsep (&running, delimiters); /* token => NULL */ +@end smallexample + +@comment string.h +@comment GNU +@deftypefun {char *} basename (const char *@var{filename}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The GNU version of the @code{basename} function returns the last +component of the path in @var{filename}. This function is the preferred +usage, since it does not modify the argument, @var{filename}, and +respects trailing slashes. The prototype for @code{basename} can be +found in @file{string.h}. Note, this function is overridden by the XPG +version, if @file{libgen.h} is included. + +Example of using GNU @code{basename}: + +@smallexample +#include <string.h> + +int +main (int argc, char *argv[]) +@{ + char *prog = basename (argv[0]); + + if (argc < 2) + @{ + fprintf (stderr, "Usage %s <arg>\n", prog); + exit (1); + @} + + @dots{} +@} +@end smallexample + +@strong{Portability Note:} This function may produce different results +on different systems. + +@end deftypefun + +@comment libgen.h +@comment XPG +@deftypefun {char *} basename (char *@var{path}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This is the standard XPG defined @code{basename}. It is similar in +spirit to the GNU version, but may modify the @var{path} by removing +trailing '/' bytes. If the @var{path} is made up entirely of '/' +bytes, then "/" will be returned. Also, if @var{path} is +@code{NULL} or an empty string, then "." is returned. The prototype for +the XPG version can be found in @file{libgen.h}. + +Example of using XPG @code{basename}: + +@smallexample +#include <libgen.h> + +int +main (int argc, char *argv[]) +@{ + char *prog; + char *path = strdupa (argv[0]); + + prog = basename (path); + + if (argc < 2) + @{ + fprintf (stderr, "Usage %s <arg>\n", prog); + exit (1); + @} + + @dots{} + +@} +@end smallexample +@end deftypefun + +@comment libgen.h +@comment XPG +@deftypefun {char *} dirname (char *@var{path}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{dirname} function is the compliment to the XPG version of +@code{basename}. It returns the parent directory of the file specified +by @var{path}. If @var{path} is @code{NULL}, an empty string, or +contains no '/' bytes, then "." is returned. The prototype for this +function can be found in @file{libgen.h}. +@end deftypefun + +@node Erasing Sensitive Data +@section Erasing Sensitive Data + +Sensitive data, such as cryptographic keys, should be erased from +memory after use, to reduce the risk that a bug will expose it to the +outside world. However, compiler optimizations may determine that an +erasure operation is ``unnecessary,'' and remove it from the generated +code, because no @emph{correct} program could access the variable or +heap object containing the sensitive data after it's deallocated. +Since erasure is a precaution against bugs, this optimization is +inappropriate. + +The function @code{explicit_bzero} erases a block of memory, and +guarantees that the compiler will not remove the erasure as +``unnecessary.'' + +@smallexample +@group +#include <string.h> + +extern void encrypt (const char *key, const char *in, + char *out, size_t n); +extern void genkey (const char *phrase, char *key); + +void encrypt_with_phrase (const char *phrase, const char *in, + char *out, size_t n) +@{ + char key[16]; + genkey (phrase, key); + encrypt (key, in, out, n); + explicit_bzero (key, 16); +@} +@end group +@end smallexample + +@noindent +In this example, if @code{memset}, @code{bzero}, or a hand-written +loop had been used, the compiler might remove them as ``unnecessary.'' + +@strong{Warning:} @code{explicit_bzero} does not guarantee that +sensitive data is @emph{completely} erased from the computer's memory. +There may be copies in temporary storage areas, such as registers and +``scratch'' stack space; since these are invisible to the source code, +a library function cannot erase them. + +Also, @code{explicit_bzero} only operates on RAM. If a sensitive data +object never needs to have its address taken other than to call +@code{explicit_bzero}, it might be stored entirely in CPU registers +@emph{until} the call to @code{explicit_bzero}. Then it will be +copied into RAM, the copy will be erased, and the original will remain +intact. Data in RAM is more likely to be exposed by a bug than data +in registers, so this creates a brief window where the data is at +greater risk of exposure than it would have been if the program didn't +try to erase it at all. + +Declaring sensitive variables as @code{volatile} will make both the +above problems @emph{worse}; a @code{volatile} variable will be stored +in memory for its entire lifetime, and the compiler will make +@emph{more} copies of it than it would otherwise have. Attempting to +erase a normal variable ``by hand'' through a +@code{volatile}-qualified pointer doesn't work at all---because the +variable itself is not @code{volatile}, some compilers will ignore the +qualification on the pointer and remove the erasure anyway. + +Having said all that, in most situations, using @code{explicit_bzero} +is better than not using it. At present, the only way to do a more +thorough job is to write the entire sensitive operation in assembly +language. We anticipate that future compilers will recognize calls to +@code{explicit_bzero} and take appropriate steps to erase all the +copies of the affected data, whereever they may be. + +@comment string.h +@comment BSD +@deftypefun void explicit_bzero (void *@var{block}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{explicit_bzero} writes zero into @var{len} bytes of memory +beginning at @var{block}, just as @code{bzero} would. The zeroes are +always written, even if the compiler could determine that this is +``unnecessary'' because no correct program could read them back. + +@strong{Note:} The @emph{only} optimization that @code{explicit_bzero} +disables is removal of ``unnecessary'' writes to memory. The compiler +can perform all the other optimizations that it could for a call to +@code{memset}. For instance, it may replace the function call with +inline memory writes, and it may assume that @var{block} cannot be a +null pointer. + +@strong{Portability Note:} This function first appeared in OpenBSD 5.5 +and has not been standardized. Other systems may provide the same +functionality under a different name, such as @code{explicit_memset}, +@code{memset_s}, or @code{SecureZeroMemory}. + +@Theglibc{} declares this function in @file{string.h}, but on other +systems it may be in @file{strings.h} instead. +@end deftypefun + +@node strfry +@section strfry + +The function below addresses the perennial programming quandary: ``How do +I take good data in string form and painlessly turn it into garbage?'' +This is actually a fairly simple task for C programmers who do not use +@theglibc{} string functions, but for programs based on @theglibc{}, +the @code{strfry} function is the preferred method for +destroying string data. + +The prototype for this function is in @file{string.h}. + +@comment string.h +@comment GNU +@deftypefun {char *} strfry (char *@var{string}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Calls initstate_r, time, getpid, strlen, and random_r. + +@code{strfry} creates a pseudorandom anagram of a string, replacing the +input with the anagram in place. For each position in the string, +@code{strfry} swaps it with a position in the string selected at random +(from a uniform distribution). The two positions may be the same. + +The return value of @code{strfry} is always @var{string}. + +@strong{Portability Note:} This function is unique to @theglibc{}. + +@end deftypefun + + +@node Trivial Encryption +@section Trivial Encryption +@cindex encryption + + +The @code{memfrob} function converts an array of data to something +unrecognizable and back again. It is not encryption in its usual sense +since it is easy for someone to convert the encrypted data back to clear +text. The transformation is analogous to Usenet's ``Rot13'' encryption +method for obscuring offensive jokes from sensitive eyes and such. +Unlike Rot13, @code{memfrob} works on arbitrary binary data, not just +text. +@cindex Rot13 + +For true encryption, @xref{Cryptographic Functions}. + +This function is declared in @file{string.h}. +@pindex string.h + +@comment string.h +@comment GNU +@deftypefun {void *} memfrob (void *@var{mem}, size_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} + +@code{memfrob} transforms (frobnicates) each byte of the data structure +at @var{mem}, which is @var{length} bytes long, by bitwise exclusive +oring it with binary 00101010. It does the transformation in place and +its return value is always @var{mem}. + +Note that @code{memfrob} a second time on the same data structure +returns it to its original state. + +This is a good function for hiding information from someone who doesn't +want to see it or doesn't want to see it very much. To really prevent +people from retrieving the information, use stronger encryption such as +that described in @xref{Cryptographic Functions}. + +@strong{Portability Note:} This function is unique to @theglibc{}. + +@end deftypefun + +@node Encode Binary Data +@section Encode Binary Data + +To store or transfer binary data in environments which only support text +one has to encode the binary data by mapping the input bytes to +bytes in the range allowed for storing or transferring. SVID +systems (and nowadays XPG compliant systems) provide minimal support for +this task. + +@comment stdlib.h +@comment XPG +@deftypefun {char *} l64a (long int @var{n}) +@safety{@prelim{}@mtunsafe{@mtasurace{:l64a}}@asunsafe{}@acsafe{}} +This function encodes a 32-bit input value using bytes from the +basic character set. It returns a pointer to a 7 byte buffer which +contains an encoded version of @var{n}. To encode a series of bytes the +user must copy the returned string to a destination buffer. It returns +the empty string if @var{n} is zero, which is somewhat bizarre but +mandated by the standard.@* +@strong{Warning:} Since a static buffer is used this function should not +be used in multi-threaded programs. There is no thread-safe alternative +to this function in the C library.@* +@strong{Compatibility Note:} The XPG standard states that the return +value of @code{l64a} is undefined if @var{n} is negative. In the GNU +implementation, @code{l64a} treats its argument as unsigned, so it will +return a sensible encoding for any nonzero @var{n}; however, portable +programs should not rely on this. + +To encode a large buffer @code{l64a} must be called in a loop, once for +each 32-bit word of the buffer. For example, one could do something +like this: + +@smallexample +char * +encode (const void *buf, size_t len) +@{ + /* @r{We know in advance how long the buffer has to be.} */ + unsigned char *in = (unsigned char *) buf; + char *out = malloc (6 + ((len + 3) / 4) * 6 + 1); + char *cp = out, *p; + + /* @r{Encode the length.} */ + /* @r{Using `htonl' is necessary so that the data can be} + @r{decoded even on machines with different byte order.} + @r{`l64a' can return a string shorter than 6 bytes, so } + @r{we pad it with encoding of 0 (}'.'@r{) at the end by } + @r{hand.} */ + + p = stpcpy (cp, l64a (htonl (len))); + cp = mempcpy (p, "......", 6 - (p - cp)); + + while (len > 3) + @{ + unsigned long int n = *in++; + n = (n << 8) | *in++; + n = (n << 8) | *in++; + n = (n << 8) | *in++; + len -= 4; + p = stpcpy (cp, l64a (htonl (n))); + cp = mempcpy (p, "......", 6 - (p - cp)); + @} + if (len > 0) + @{ + unsigned long int n = *in++; + if (--len > 0) + @{ + n = (n << 8) | *in++; + if (--len > 0) + n = (n << 8) | *in; + @} + cp = stpcpy (cp, l64a (htonl (n))); + @} + *cp = '\0'; + return out; +@} +@end smallexample + +It is strange that the library does not provide the complete +functionality needed but so be it. + +@end deftypefun + +To decode data produced with @code{l64a} the following function should be +used. + +@comment stdlib.h +@comment XPG +@deftypefun {long int} a64l (const char *@var{string}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The parameter @var{string} should contain a string which was produced by +a call to @code{l64a}. The function processes at least 6 bytes of +this string, and decodes the bytes it finds according to the table +below. It stops decoding when it finds a byte not in the table, +rather like @code{atoi}; if you have a buffer which has been broken into +lines, you must be careful to skip over the end-of-line bytes. + +The decoded number is returned as a @code{long int} value. +@end deftypefun + +The @code{l64a} and @code{a64l} functions use a base 64 encoding, in +which each byte of an encoded string represents six bits of an +input word. These symbols are used for the base 64 digits: + +@multitable {xxxxx} {xxx} {xxx} {xxx} {xxx} {xxx} {xxx} {xxx} {xxx} +@item @tab 0 @tab 1 @tab 2 @tab 3 @tab 4 @tab 5 @tab 6 @tab 7 +@item 0 @tab @code{.} @tab @code{/} @tab @code{0} @tab @code{1} + @tab @code{2} @tab @code{3} @tab @code{4} @tab @code{5} +@item 8 @tab @code{6} @tab @code{7} @tab @code{8} @tab @code{9} + @tab @code{A} @tab @code{B} @tab @code{C} @tab @code{D} +@item 16 @tab @code{E} @tab @code{F} @tab @code{G} @tab @code{H} + @tab @code{I} @tab @code{J} @tab @code{K} @tab @code{L} +@item 24 @tab @code{M} @tab @code{N} @tab @code{O} @tab @code{P} + @tab @code{Q} @tab @code{R} @tab @code{S} @tab @code{T} +@item 32 @tab @code{U} @tab @code{V} @tab @code{W} @tab @code{X} + @tab @code{Y} @tab @code{Z} @tab @code{a} @tab @code{b} +@item 40 @tab @code{c} @tab @code{d} @tab @code{e} @tab @code{f} + @tab @code{g} @tab @code{h} @tab @code{i} @tab @code{j} +@item 48 @tab @code{k} @tab @code{l} @tab @code{m} @tab @code{n} + @tab @code{o} @tab @code{p} @tab @code{q} @tab @code{r} +@item 56 @tab @code{s} @tab @code{t} @tab @code{u} @tab @code{v} + @tab @code{w} @tab @code{x} @tab @code{y} @tab @code{z} +@end multitable + +This encoding scheme is not standard. There are some other encoding +methods which are much more widely used (UU encoding, MIME encoding). +Generally, it is better to use one of these encodings. + +@node Argz and Envz Vectors +@section Argz and Envz Vectors + +@cindex argz vectors (string vectors) +@cindex string vectors, null-byte separated +@cindex argument vectors, null-byte separated +@dfn{argz vectors} are vectors of strings in a contiguous block of +memory, each element separated from its neighbors by null bytes +(@code{'\0'}). + +@cindex envz vectors (environment vectors) +@cindex environment vectors, null-byte separated +@dfn{Envz vectors} are an extension of argz vectors where each element is a +name-value pair, separated by a @code{'='} byte (as in a Unix +environment). + +@menu +* Argz Functions:: Operations on argz vectors. +* Envz Functions:: Additional operations on environment vectors. +@end menu + +@node Argz Functions, Envz Functions, , Argz and Envz Vectors +@subsection Argz Functions + +Each argz vector is represented by a pointer to the first element, of +type @code{char *}, and a size, of type @code{size_t}, both of which can +be initialized to @code{0} to represent an empty argz vector. All argz +functions accept either a pointer and a size argument, or pointers to +them, if they will be modified. + +The argz functions use @code{malloc}/@code{realloc} to allocate/grow +argz vectors, and so any argz vector created using these functions may +be freed by using @code{free}; conversely, any argz function that may +grow a string expects that string to have been allocated using +@code{malloc} (those argz functions that only examine their arguments or +modify them in place will work on any sort of memory). +@xref{Unconstrained Allocation}. + +All argz functions that do memory allocation have a return type of +@code{error_t}, and return @code{0} for success, and @code{ENOMEM} if an +allocation error occurs. + +@pindex argz.h +These functions are declared in the standard include file @file{argz.h}. + +@comment argz.h +@comment GNU +@deftypefun {error_t} argz_create (char *const @var{argv}[], char **@var{argz}, size_t *@var{argz_len}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{argz_create} function converts the Unix-style argument vector +@var{argv} (a vector of pointers to normal C strings, terminated by +@code{(char *)0}; @pxref{Program Arguments}) into an argz vector with +the same elements, which is returned in @var{argz} and @var{argz_len}. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {error_t} argz_create_sep (const char *@var{string}, int @var{sep}, char **@var{argz}, size_t *@var{argz_len}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{argz_create_sep} function converts the string +@var{string} into an argz vector (returned in @var{argz} and +@var{argz_len}) by splitting it into elements at every occurrence of the +byte @var{sep}. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {size_t} argz_count (const char *@var{argz}, size_t @var{argz_len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +Returns the number of elements in the argz vector @var{argz} and +@var{argz_len}. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {void} argz_extract (const char *@var{argz}, size_t @var{argz_len}, char **@var{argv}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{argz_extract} function converts the argz vector @var{argz} and +@var{argz_len} into a Unix-style argument vector stored in @var{argv}, +by putting pointers to every element in @var{argz} into successive +positions in @var{argv}, followed by a terminator of @code{0}. +@var{Argv} must be pre-allocated with enough space to hold all the +elements in @var{argz} plus the terminating @code{(char *)0} +(@code{(argz_count (@var{argz}, @var{argz_len}) + 1) * sizeof (char *)} +bytes should be enough). Note that the string pointers stored into +@var{argv} point into @var{argz}---they are not copies---and so +@var{argz} must be copied if it will be changed while @var{argv} is +still active. This function is useful for passing the elements in +@var{argz} to an exec function (@pxref{Executing a File}). +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {void} argz_stringify (char *@var{argz}, size_t @var{len}, int @var{sep}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{argz_stringify} converts @var{argz} into a normal string with +the elements separated by the byte @var{sep}, by replacing each +@code{'\0'} inside @var{argz} (except the last one, which terminates the +string) with @var{sep}. This is handy for printing @var{argz} in a +readable manner. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {error_t} argz_add (char **@var{argz}, size_t *@var{argz_len}, const char *@var{str}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c Calls strlen and argz_append. +The @code{argz_add} function adds the string @var{str} to the end of the +argz vector @code{*@var{argz}}, and updates @code{*@var{argz}} and +@code{*@var{argz_len}} accordingly. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {error_t} argz_add_sep (char **@var{argz}, size_t *@var{argz_len}, const char *@var{str}, int @var{delim}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{argz_add_sep} function is similar to @code{argz_add}, but +@var{str} is split into separate elements in the result at occurrences of +the byte @var{delim}. This is useful, for instance, for +adding the components of a Unix search path to an argz vector, by using +a value of @code{':'} for @var{delim}. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {error_t} argz_append (char **@var{argz}, size_t *@var{argz_len}, const char *@var{buf}, size_t @var{buf_len}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{argz_append} function appends @var{buf_len} bytes starting at +@var{buf} to the argz vector @code{*@var{argz}}, reallocating +@code{*@var{argz}} to accommodate it, and adding @var{buf_len} to +@code{*@var{argz_len}}. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {void} argz_delete (char **@var{argz}, size_t *@var{argz_len}, char *@var{entry}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c Calls free if no argument is left. +If @var{entry} points to the beginning of one of the elements in the +argz vector @code{*@var{argz}}, the @code{argz_delete} function will +remove this entry and reallocate @code{*@var{argz}}, modifying +@code{*@var{argz}} and @code{*@var{argz_len}} accordingly. Note that as +destructive argz functions usually reallocate their argz argument, +pointers into argz vectors such as @var{entry} will then become invalid. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {error_t} argz_insert (char **@var{argz}, size_t *@var{argz_len}, char *@var{before}, const char *@var{entry}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c Calls argz_add or realloc and memmove. +The @code{argz_insert} function inserts the string @var{entry} into the +argz vector @code{*@var{argz}} at a point just before the existing +element pointed to by @var{before}, reallocating @code{*@var{argz}} and +updating @code{*@var{argz}} and @code{*@var{argz_len}}. If @var{before} +is @code{0}, @var{entry} is added to the end instead (as if by +@code{argz_add}). Since the first element is in fact the same as +@code{*@var{argz}}, passing in @code{*@var{argz}} as the value of +@var{before} will result in @var{entry} being inserted at the beginning. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun {char *} argz_next (const char *@var{argz}, size_t @var{argz_len}, const char *@var{entry}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{argz_next} function provides a convenient way of iterating +over the elements in the argz vector @var{argz}. It returns a pointer +to the next element in @var{argz} after the element @var{entry}, or +@code{0} if there are no elements following @var{entry}. If @var{entry} +is @code{0}, the first element of @var{argz} is returned. + +This behavior suggests two styles of iteration: + +@smallexample + char *entry = 0; + while ((entry = argz_next (@var{argz}, @var{argz_len}, entry))) + @var{action}; +@end smallexample + +(the double parentheses are necessary to make some C compilers shut up +about what they consider a questionable @code{while}-test) and: + +@smallexample + char *entry; + for (entry = @var{argz}; + entry; + entry = argz_next (@var{argz}, @var{argz_len}, entry)) + @var{action}; +@end smallexample + +Note that the latter depends on @var{argz} having a value of @code{0} if +it is empty (rather than a pointer to an empty block of memory); this +invariant is maintained for argz vectors created by the functions here. +@end deftypefun + +@comment argz.h +@comment GNU +@deftypefun error_t argz_replace (@w{char **@var{argz}, size_t *@var{argz_len}}, @w{const char *@var{str}, const char *@var{with}}, @w{unsigned *@var{replace_count}}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +Replace any occurrences of the string @var{str} in @var{argz} with +@var{with}, reallocating @var{argz} as necessary. If +@var{replace_count} is non-zero, @code{*@var{replace_count}} will be +incremented by the number of replacements performed. +@end deftypefun + +@node Envz Functions, , Argz Functions, Argz and Envz Vectors +@subsection Envz Functions + +Envz vectors are just argz vectors with additional constraints on the form +of each element; as such, argz functions can also be used on them, where it +makes sense. + +Each element in an envz vector is a name-value pair, separated by a @code{'='} +byte; if multiple @code{'='} bytes are present in an element, those +after the first are considered part of the value, and treated like all other +non-@code{'\0'} bytes. + +If @emph{no} @code{'='} bytes are present in an element, that element is +considered the name of a ``null'' entry, as distinct from an entry with an +empty value: @code{envz_get} will return @code{0} if given the name of null +entry, whereas an entry with an empty value would result in a value of +@code{""}; @code{envz_entry} will still find such entries, however. Null +entries can be removed with the @code{envz_strip} function. + +As with argz functions, envz functions that may allocate memory (and thus +fail) have a return type of @code{error_t}, and return either @code{0} or +@code{ENOMEM}. + +@pindex envz.h +These functions are declared in the standard include file @file{envz.h}. + +@comment envz.h +@comment GNU +@deftypefun {char *} envz_entry (const char *@var{envz}, size_t @var{envz_len}, const char *@var{name}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{envz_entry} function finds the entry in @var{envz} with the name +@var{name}, and returns a pointer to the whole entry---that is, the argz +element which begins with @var{name} followed by a @code{'='} byte. If +there is no entry with that name, @code{0} is returned. +@end deftypefun + +@comment envz.h +@comment GNU +@deftypefun {char *} envz_get (const char *@var{envz}, size_t @var{envz_len}, const char *@var{name}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{envz_get} function finds the entry in @var{envz} with the name +@var{name} (like @code{envz_entry}), and returns a pointer to the value +portion of that entry (following the @code{'='}). If there is no entry with +that name (or only a null entry), @code{0} is returned. +@end deftypefun + +@comment envz.h +@comment GNU +@deftypefun {error_t} envz_add (char **@var{envz}, size_t *@var{envz_len}, const char *@var{name}, const char *@var{value}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +@c Calls envz_remove, which calls enz_entry and argz_delete, and then +@c argz_add or equivalent code that reallocs and appends name=value. +The @code{envz_add} function adds an entry to @code{*@var{envz}} +(updating @code{*@var{envz}} and @code{*@var{envz_len}}) with the name +@var{name}, and value @var{value}. If an entry with the same name +already exists in @var{envz}, it is removed first. If @var{value} is +@code{0}, then the new entry will be the special null type of entry +(mentioned above). +@end deftypefun + +@comment envz.h +@comment GNU +@deftypefun {error_t} envz_merge (char **@var{envz}, size_t *@var{envz_len}, const char *@var{envz2}, size_t @var{envz2_len}, int @var{override}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{envz_merge} function adds each entry in @var{envz2} to @var{envz}, +as if with @code{envz_add}, updating @code{*@var{envz}} and +@code{*@var{envz_len}}. If @var{override} is true, then values in @var{envz2} +will supersede those with the same name in @var{envz}, otherwise not. + +Null entries are treated just like other entries in this respect, so a null +entry in @var{envz} can prevent an entry of the same name in @var{envz2} from +being added to @var{envz}, if @var{override} is false. +@end deftypefun + +@comment envz.h +@comment GNU +@deftypefun {void} envz_strip (char **@var{envz}, size_t *@var{envz_len}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{envz_strip} function removes any null entries from @var{envz}, +updating @code{*@var{envz}} and @code{*@var{envz_len}}. +@end deftypefun + +@comment envz.h +@comment GNU +@deftypefun {void} envz_remove (char **@var{envz}, size_t *@var{envz_len}, const char *@var{name}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{}}} +The @code{envz_remove} function removes an entry named @var{name} from +@var{envz}, updating @code{*@var{envz}} and @code{*@var{envz_len}}. +@end deftypefun + +@c FIXME this are undocumented: +@c strcasecmp_l @safety{@mtsafe{}@assafe{}@acsafe{}} see strcasecmp diff --git a/REORG.TODO/manual/summary.awk b/REORG.TODO/manual/summary.awk new file mode 100644 index 0000000000..1defe616f7 --- /dev/null +++ b/REORG.TODO/manual/summary.awk @@ -0,0 +1,133 @@ +# awk script to create summary.texinfo from the library texinfo files. +# Copyright (C) 1992-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +# This script recognizes sequences that look like: +# @comment HEADER.h +# @comment STANDARD +# @def... ITEM | @item ITEM | @vindex ITEM + +BEGIN { header = 0; +nameword["@defun"]=1 +nameword["@defunx"]=1 +nameword["@defmac"]=1 +nameword["@defmacx"]=1 +nameword["@defspec"]=1 +nameword["@defspecx"]=1 +nameword["@defvar"]=1 +nameword["@defvarx"]=1 +nameword["@defopt"]=1 +nameword["@defoptx"]=1 +nameword["@deffn"]=2 +nameword["@deffnx"]=2 +nameword["@defvr"]=2 +nameword["@defvrx"]=2 +nameword["@deftp"]=2 +nameword["@deftpx"]=2 +nameword["@deftypefun"]=2 +nameword["@deftypefunx"]=2 +nameword["@deftypevar"]=2 +nameword["@deftypevarx"]=2 +nameword["@deftypefn"]=3 +nameword["@deftypefnx"]=3 +nameword["@deftypevr"]=3 +nameword["@deftypevrx"]=3 +firstword["@defun"]=1 +firstword["@defunx"]=1 +firstword["@defmac"]=1 +firstword["@defmacx"]=1 +firstword["@defspec"]=1 +firstword["@defspecx"]=1 +firstword["@defvar"]=1 +firstword["@defvarx"]=1 +firstword["@defopt"]=1 +firstword["@defoptx"]=1 +firstword["@deffn"]=2 +firstword["@deffnx"]=2 +firstword["@defvr"]=2 +firstword["@defvrx"]=2 +firstword["@deftp"]=2 +firstword["@deftpx"]=2 +firstword["@deftypefun"]=1 +firstword["@deftypefunx"]=1 +firstword["@deftypevar"]=1 +firstword["@deftypevarx"]=1 +firstword["@deftypefn"]=2 +firstword["@deftypefnx"]=2 +firstword["@deftypevr"]=2 +firstword["@deftypevrx"]=2 +nameword["@item"]=1 +firstword["@item"]=1 +nameword["@itemx"]=1 +firstword["@itemx"]=1 +nameword["@vindex"]=1 +firstword["@vindex"]=1 + +print "@c DO NOT EDIT THIS FILE!" +print "@c This file is generated by summary.awk from the Texinfo sources." +} + +$1 == "@node" { node=$2; + for (i = 3; i <= NF; ++i) + { node=node " " $i; if ( $i ~ /,/ ) break; } + sub (/,[, ]*$/, "", node); + } + +$1 == "@comment" && $2 ~ /\.h$/ { header="@file{" $2 "}"; + for (i = 3; i <= NF; ++i) + header=header ", @file{" $i "}" + } + +$1 == "@comment" && $2 == "(none)" { header = -1; } + +$1 == "@comment" && header != 0 { std=$2; + for (i=3;i<=NF;++i) std=std " " $i } + +header != 0 && $1 ~ /@def|@item|@vindex/ \ + { defn=""; name=""; curly=0; n=1; + for (i = 2; i <= NF; ++i) { + if ($i ~ /^{/ && $i !~ /}/) { + curly=1 + word=substr ($i, 2, length ($i)) + } + else { + if (curly) { + if ($i ~ /}$/) { + curly=0 + word=word " " substr ($i, 1, length ($i) - 1) + } else + word=word " " $i + } + # Handle a single word in braces. + else if ($i ~ /^{.*}$/) + word=substr ($i, 2, length ($i) - 2) + else + word=$i + if (!curly) { + if (n >= firstword[$1]) + defn=defn " " word + if (n == nameword[$1]) + name=word + ++n + } + } + } + printf "@comment %s%c", name, 12 # FF + printf "@item%s%c%c", defn, 12, 12 + if (header != -1) printf "%s ", header; + printf "(%s): @ref{%s}.%c\n", std, node, 12; + header = 0 } diff --git a/REORG.TODO/manual/sysinfo.texi b/REORG.TODO/manual/sysinfo.texi new file mode 100644 index 0000000000..9a8b79d66b --- /dev/null +++ b/REORG.TODO/manual/sysinfo.texi @@ -0,0 +1,1275 @@ +@node System Management, System Configuration, Users and Groups, Top +@c %MENU% Controlling the system and getting information about it +@chapter System Management + +This chapter describes facilities for controlling the system that +underlies a process (including the operating system and hardware) and +for getting information about it. Anyone can generally use the +informational facilities, but usually only a properly privileged process +can make changes. + + +@menu +* Host Identification:: Determining the name of the machine. +* Platform Type:: Determining operating system and basic + machine type +* Filesystem Handling:: Controlling/querying mounts +* System Parameters:: Getting and setting various system parameters +@end menu + +To get information on parameters of the system that are built into the +system, such as the maximum length of a filename, @ref{System +Configuration}. + +@node Host Identification +@section Host Identification + +This section explains how to identify the particular system on which your +program is running. First, let's review the various ways computer systems +are named, which is a little complicated because of the history of the +development of the Internet. + +Every Unix system (also known as a host) has a host name, whether it's +connected to a network or not. In its simplest form, as used before +computer networks were an issue, it's just a word like @samp{chicken}. +@cindex host name + +But any system attached to the Internet or any network like it conforms +to a more rigorous naming convention as part of the Domain Name System +(DNS). In the DNS, every host name is composed of two parts: +@cindex DNS +@cindex Domain Name System + +@enumerate +@item +hostname +@cindex hostname +@item +domain name +@cindex domain name +@end enumerate + +You will note that ``hostname'' looks a lot like ``host name'', but is +not the same thing, and that people often incorrectly refer to entire +host names as ``domain names.'' + +In the DNS, the full host name is properly called the FQDN (Fully Qualified +Domain Name) and consists of the hostname, then a period, then the +domain name. The domain name itself usually has multiple components +separated by periods. So for example, a system's hostname may be +@samp{chicken} and its domain name might be @samp{ai.mit.edu}, so +its FQDN (which is its host name) is @samp{chicken.ai.mit.edu}. +@cindex FQDN + +Adding to the confusion, though, is that the DNS is not the only name space +in which a computer needs to be known. Another name space is the +NIS (aka YP) name space. For NIS purposes, there is another domain +name, which is called the NIS domain name or the YP domain name. It +need not have anything to do with the DNS domain name. +@cindex YP +@cindex NIS +@cindex NIS domain name +@cindex YP domain name + +Confusing things even more is the fact that in the DNS, it is possible for +multiple FQDNs to refer to the same system. However, there is always +exactly one of them that is the true host name, and it is called the +canonical FQDN. + +In some contexts, the host name is called a ``node name.'' + +For more information on DNS host naming, see @ref{Host Names}. + +@pindex hostname +@pindex hostid +@pindex unistd.h +Prototypes for these functions appear in @file{unistd.h}. + +The programs @code{hostname}, @code{hostid}, and @code{domainname} work +by calling these functions. + +@comment unistd.h +@comment BSD +@deftypefun int gethostname (char *@var{name}, size_t @var{size}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on unix; implemented in terms of uname on posix and of +@c hurd_get_host_config on hurd. +This function returns the host name of the system on which it is called, +in the array @var{name}. The @var{size} argument specifies the size of +this array, in bytes. Note that this is @emph{not} the DNS hostname. +If the system participates in the DNS, this is the FQDN (see above). + +The return value is @code{0} on success and @code{-1} on failure. In +@theglibc{}, @code{gethostname} fails if @var{size} is not large +enough; then you can try again with a larger array. The following +@code{errno} error condition is defined for this function: + +@table @code +@item ENAMETOOLONG +The @var{size} argument is less than the size of the host name plus one. +@end table + +@pindex sys/param.h +On some systems, there is a symbol for the maximum possible host name +length: @code{MAXHOSTNAMELEN}. It is defined in @file{sys/param.h}. +But you can't count on this to exist, so it is cleaner to handle +failure and try again. + +@code{gethostname} stores the beginning of the host name in @var{name} +even if the host name won't entirely fit. For some purposes, a +truncated host name is good enough. If it is, you can ignore the +error code. +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun int sethostname (const char *@var{name}, size_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on unix; implemented in terms of hurd_set_host_config +@c on hurd. +The @code{sethostname} function sets the host name of the system that +calls it to @var{name}, a string with length @var{length}. Only +privileged processes are permitted to do this. + +Usually @code{sethostname} gets called just once, at system boot time. +Often, the program that calls it sets it to the value it finds in the +file @code{/etc/hostname}. +@cindex /etc/hostname + +Be sure to set the host name to the full host name, not just the DNS +hostname (see above). + +The return value is @code{0} on success and @code{-1} on failure. +The following @code{errno} error condition is defined for this function: + +@table @code +@item EPERM +This process cannot set the host name because it is not privileged. +@end table +@end deftypefun + +@comment unistd.h +@comment ??? +@deftypefun int getdomainnname (char *@var{name}, size_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Syscalls uname, then strlen and memcpy. +@cindex NIS domain name +@cindex YP domain name + +@code{getdomainname} returns the NIS (aka YP) domain name of the system +on which it is called. Note that this is not the more popular DNS +domain name. Get that with @code{gethostname}. + +The specifics of this function are analogous to @code{gethostname}, above. + +@end deftypefun + +@comment unistd.h +@comment ??? +@deftypefun int setdomainname (const char *@var{name}, size_t @var{length}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall. +@cindex NIS domain name +@cindex YP domain name + +@code{setdomainname} sets the NIS (aka YP) domain name of the system +on which it is called. Note that this is not the more popular DNS +domain name. Set that with @code{sethostname}. + +The specifics of this function are analogous to @code{sethostname}, above. + +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun {long int} gethostid (void) +@safety{@prelim{}@mtsafe{@mtshostid{} @mtsenv{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@aculock{} @acucorrupt{} @acsmem{} @acsfd{}}} +@c On HURD, calls _hurd_get_host_config and strtol. On Linux, open +@c HOSTIDFILE, reads an int32_t and closes; if that fails, it calls +@c gethostname and gethostbyname_r to use the h_addr. +This function returns the ``host ID'' of the machine the program is +running on. By convention, this is usually the primary Internet IP address +of that machine, converted to a @w{@code{long int}}. However, on some +systems it is a meaningless but unique number which is hard-coded for +each machine. + +This is not widely used. It arose in BSD 4.2, but was dropped in BSD 4.4. +It is not required by POSIX. + +The proper way to query the IP address is to use @code{gethostbyname} +on the results of @code{gethostname}. For more information on IP addresses, +@xref{Host Addresses}. +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun int sethostid (long int @var{id}) +@safety{@prelim{}@mtunsafe{@mtasuconst{:@mtshostid{}}}@asunsafe{}@acunsafe{@acucorrupt{} @acsfd{}}} +The @code{sethostid} function sets the ``host ID'' of the host machine +to @var{id}. Only privileged processes are permitted to do this. Usually +it happens just once, at system boot time. + +The proper way to establish the primary IP address of a system +is to configure the IP address resolver to associate that IP address with +the system's host name as returned by @code{gethostname}. For example, +put a record for the system in @file{/etc/hosts}. + +See @code{gethostid} above for more information on host ids. + +The return value is @code{0} on success and @code{-1} on failure. +The following @code{errno} error conditions are defined for this function: + +@table @code +@item EPERM +This process cannot set the host name because it is not privileged. + +@item ENOSYS +The operating system does not support setting the host ID. On some +systems, the host ID is a meaningless but unique number hard-coded for +each machine. +@end table +@end deftypefun + +@node Platform Type +@section Platform Type Identification + +You can use the @code{uname} function to find out some information about +the type of computer your program is running on. This function and the +associated data type are declared in the header file +@file{sys/utsname.h}. +@pindex sys/utsname.h + +As a bonus, @code{uname} also gives some information identifying the +particular system your program is running on. This is the same information +which you can get with functions targeted to this purpose described in +@ref{Host Identification}. + + +@comment sys/utsname.h +@comment POSIX.1 +@deftp {Data Type} {struct utsname} +The @code{utsname} structure is used to hold information returned +by the @code{uname} function. It has the following members: + +@table @code +@item char sysname[] +This is the name of the operating system in use. + +@item char release[] +This is the current release level of the operating system implementation. + +@item char version[] +This is the current version level within the release of the operating +system. + +@item char machine[] +This is a description of the type of hardware that is in use. + +Some systems provide a mechanism to interrogate the kernel directly for +this information. On systems without such a mechanism, @theglibc{} +fills in this field based on the configuration name that was +specified when building and installing the library. + +GNU uses a three-part name to describe a system configuration; the three +parts are @var{cpu}, @var{manufacturer} and @var{system-type}, and they +are separated with dashes. Any possible combination of three names is +potentially meaningful, but most such combinations are meaningless in +practice and even the meaningful ones are not necessarily supported by +any particular GNU program. + +Since the value in @code{machine} is supposed to describe just the +hardware, it consists of the first two parts of the configuration name: +@samp{@var{cpu}-@var{manufacturer}}. For example, it might be one of these: + +@quotation +@code{"sparc-sun"}, +@code{"i386-@var{anything}"}, +@code{"m68k-hp"}, +@code{"m68k-sony"}, +@code{"m68k-sun"}, +@code{"mips-dec"} +@end quotation + +@item char nodename[] +This is the host name of this particular computer. In @theglibc{}, +the value is the same as that returned by @code{gethostname}; +see @ref{Host Identification}. + +@code{gethostname} is implemented with a call to @code{uname}. + +@item char domainname[] +This is the NIS or YP domain name. It is the same value returned by +@code{getdomainname}; see @ref{Host Identification}. This element +is a relatively recent invention and use of it is not as portable as +use of the rest of the structure. + +@c getdomainname() is implemented with a call to uname(). + +@end table +@end deftp + +@comment sys/utsname.h +@comment POSIX.1 +@deftypefun int uname (struct utsname *@var{info}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall on unix; the posix fallback is to call gethostname and +@c then fills in the other fields with constants; on HURD, it calls +@c proc_uname and then gethostname. +The @code{uname} function fills in the structure pointed to by +@var{info} with information about the operating system and host machine. +A non-negative return value indicates that the data was successfully stored. + +@code{-1} as the return value indicates an error. The only error possible is +@code{EFAULT}, which we normally don't mention as it is always a +possibility. +@end deftypefun + + +@node Filesystem Handling +@section Controlling and Querying Mounts + +All files are in filesystems, and before you can access any file, its +filesystem must be mounted. Because of Unix's concept of +@emph{Everything is a file}, mounting of filesystems is central to doing +almost anything. This section explains how to find out what filesystems +are currently mounted and what filesystems are available for mounting, +and how to change what is mounted. + +The classic filesystem is the contents of a disk drive. The concept is +considerably more abstract, though, and lots of things other than disk +drives can be mounted. + +Some block devices don't correspond to traditional devices like disk +drives. For example, a loop device is a block device whose driver uses +a regular file in another filesystem as its medium. So if that regular +file contains appropriate data for a filesystem, you can by mounting the +loop device essentially mount a regular file. + +Some filesystems aren't based on a device of any kind. The ``proc'' +filesystem, for example, contains files whose data is made up by the +filesystem driver on the fly whenever you ask for it. And when you +write to it, the data you write causes changes in the system. No data +gets stored. + +@c It would be good to mention NFS mounts here. + +@menu +* Mount Information:: What is or could be mounted? +* Mount-Unmount-Remount:: Controlling what is mounted and how +@end menu + +@node Mount Information, Mount-Unmount-Remount, , Filesystem Handling +@subsection Mount Information + +For some programs it is desirable and necessary to access information +about whether a certain filesystem is mounted and, if it is, where, or +simply to get lists of all the available filesystems. @Theglibc{} +provides some functions to retrieve this information portably. + +Traditionally Unix systems have a file named @file{/etc/fstab} which +describes all possibly mounted filesystems. The @code{mount} program +uses this file to mount at startup time of the system all the +necessary filesystems. The information about all the filesystems +actually mounted is normally kept in a file named either +@file{/var/run/mtab} or @file{/etc/mtab}. Both files share the same +syntax and it is crucial that this syntax is followed all the time. +Therefore it is best to never directly write to the files. The functions +described in this section can do this and they also provide the +functionality to convert the external textual representation to the +internal representation. + +Note that the @file{fstab} and @file{mtab} files are maintained on a +system by @emph{convention}. It is possible for the files not to exist +or not to be consistent with what is really mounted or available to +mount, if the system's administration policy allows it. But programs +that mount and unmount filesystems typically maintain and use these +files as described herein. + +@vindex _PATH_FSTAB +@vindex _PATH_MNTTAB +@vindex _PATH_MOUNTED +@vindex FSTAB +@vindex MNTTAB +@vindex MOUNTED +The filenames given above should never be used directly. The portable +way to handle these files is to use the macros @code{_PATH_FSTAB}, +defined in @file{fstab.h}, or @code{_PATH_MNTTAB}, defined in +@file{mntent.h} and @file{paths.h}, for @file{fstab}; and the macro +@code{_PATH_MOUNTED}, also defined in @file{mntent.h} and +@file{paths.h}, for @file{mtab}. There are also two alternate macro +names @code{FSTAB}, @code{MNTTAB}, and @code{MOUNTED} defined but +these names are deprecated and kept only for backward compatibility. +The names @code{_PATH_MNTTAB} and @code{_PATH_MOUNTED} should always be used. + +@menu +* fstab:: The @file{fstab} file +* mtab:: The @file{mtab} file +* Other Mount Information:: Other (non-libc) sources of mount information +@end menu + +@node fstab +@subsubsection The @file{fstab} file + +The internal representation for entries of the file is @w{@code{struct +fstab}}, defined in @file{fstab.h}. + +@comment fstab.h +@comment BSD +@deftp {Data Type} {struct fstab} +This structure is used with the @code{getfsent}, @code{getfsspec}, and +@code{getfsfile} functions. + +@table @code +@item char *fs_spec +This element describes the device from which the filesystem is mounted. +Normally this is the name of a special device, such as a hard disk +partition, but it could also be a more or less generic string. For +@dfn{NFS} it would be a hostname and directory name combination. + +Even though the element is not declared @code{const} it shouldn't be +modified. The missing @code{const} has historic reasons, since this +function predates @w{ISO C}. The same is true for the other string +elements of this structure. + +@item char *fs_file +This describes the mount point on the local system. I.e., accessing any +file in this filesystem has implicitly or explicitly this string as a +prefix. + +@item char *fs_vfstype +This is the type of the filesystem. Depending on what the underlying +kernel understands it can be any string. + +@item char *fs_mntops +This is a string containing options passed to the kernel with the +@code{mount} call. Again, this can be almost anything. There can be +more than one option, separated from the others by a comma. Each option +consists of a name and an optional value part, introduced by an @code{=} +character. + +If the value of this element must be processed it should ideally be done +using the @code{getsubopt} function; see @ref{Suboptions}. + +@item const char *fs_type +This name is poorly chosen. This element points to a string (possibly +in the @code{fs_mntops} string) which describes the modes with which the +filesystem is mounted. @file{fstab} defines five macros to describe the +possible values: + +@vtable @code +@item FSTAB_RW +The filesystem gets mounted with read and write enabled. +@item FSTAB_RQ +The filesystem gets mounted with read and write enabled. Write access +is restricted by quotas. +@item FSTAB_RO +The filesystem gets mounted read-only. +@item FSTAB_SW +This is not a real filesystem, it is a swap device. +@item FSTAB_XX +This entry from the @file{fstab} file is totally ignored. +@end vtable + +Testing for equality with these values must happen using @code{strcmp} +since these are all strings. Comparing the pointer will probably always +fail. + +@item int fs_freq +This element describes the dump frequency in days. + +@item int fs_passno +This element describes the pass number on parallel dumps. It is closely +related to the @code{dump} utility used on Unix systems. +@end table +@end deftp + + +To read the entire content of the of the @file{fstab} file @theglibc{} +contains a set of three functions which are designed in the usual way. + +@comment fstab.h +@comment BSD +@deftypefun int setfsent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:fsent}}@asunsafe{@ascuheap{} @asucorrupt{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c setfsent @mtasurace:fsent @ascuheap @asucorrupt @asulock @acucorrupt @aculock @acsmem @acsfd +@c fstab_init(1) @mtasurace:fsent @ascuheap @asucorrupt @asulock @acucorrupt @aculock @acsmem @acsfd +@c malloc dup @ascuheap @acsmem +@c rewind dup @asucorrupt @acucorrupt [no @aculock] +@c setmntent dup @ascuheap @asulock @acsmem @acsfd @aculock +This function makes sure that the internal read pointer for the +@file{fstab} file is at the beginning of the file. This is done by +either opening the file or resetting the read pointer. + +Since the file handle is internal to the libc this function is not +thread-safe. + +This function returns a non-zero value if the operation was successful +and the @code{getfs*} functions can be used to read the entries of the +file. +@end deftypefun + +@comment fstab.h +@comment BSD +@deftypefun void endfsent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:fsent}}@asunsafe{@ascuheap{} @asucorrupt{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c endfsent @mtasurace:fsent @ascuheap @asucorrupt @asulock @acucorrupt @aculock @acsmem @acsfd +@c endmntent dup @ascuheap @asulock @aculock @acsmem @acsfd +This function makes sure that all resources acquired by a prior call to +@code{setfsent} (explicitly or implicitly by calling @code{getfsent}) are +freed. +@end deftypefun + +@comment fstab.h +@comment BSD +@deftypefun {struct fstab *} getfsent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:fsent} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c getfsent @mtasurace:fsent @mtslocale @asucorrupt @ascuheap @asulock @acucorrupt @aculock @acsmem +@c fstab_init(0) dup @mtasurace:fsent @ascuheap @asucorrupt @asulock @acucorrupt @aculock @acsmem @acsfd +@c fstab_fetch @mtasurace:fsent @mtslocale @asucorrupt @ascuheap @acucorrupt @aculock @acsmem +@c getmntent_r dup @mtslocale @asucorrupt @ascuheap @acucorrupt @aculock @acsmem +@c fstab_convert @mtasurace:fsent +@c hasmntopt dup ok +This function returns the next entry of the @file{fstab} file. If this +is the first call to any of the functions handling @file{fstab} since +program start or the last call of @code{endfsent}, the file will be +opened. + +The function returns a pointer to a variable of type @code{struct +fstab}. This variable is shared by all threads and therefore this +function is not thread-safe. If an error occurred @code{getfsent} +returns a @code{NULL} pointer. +@end deftypefun + +@comment fstab.h +@comment BSD +@deftypefun {struct fstab *} getfsspec (const char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasurace{:fsent} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c getffsspec @mtasurace:fsent @mtslocale @asucorrupt @ascuheap @asulock @acucorrupt @aculock @acsmem +@c fstab_init(1) dup @mtasurace:fsent @ascuheap @asucorrupt @asulock @acucorrupt @aculock @acsmem @acsfd +@c fstab_fetch dup @mtasurace:fsent @mtslocale @asucorrupt @ascuheap @acucorrupt @aculock @acsmem +@c strcmp dup ok +@c fstab_convert dup @mtasurace:fsent +This function returns the next entry of the @file{fstab} file which has +a string equal to @var{name} pointed to by the @code{fs_spec} element. +Since there is normally exactly one entry for each special device it +makes no sense to call this function more than once for the same +argument. If this is the first call to any of the functions handling +@file{fstab} since program start or the last call of @code{endfsent}, +the file will be opened. + +The function returns a pointer to a variable of type @code{struct +fstab}. This variable is shared by all threads and therefore this +function is not thread-safe. If an error occurred @code{getfsent} +returns a @code{NULL} pointer. +@end deftypefun + +@comment fstab.h +@comment BSD +@deftypefun {struct fstab *} getfsfile (const char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasurace{:fsent} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c getffsfile @mtasurace:fsent @mtslocale @asucorrupt @ascuheap @asulock @acucorrupt @aculock @acsmem +@c fstab_init(1) dup @mtasurace:fsent @ascuheap @asucorrupt @asulock @acucorrupt @aculock @acsmem @acsfd +@c fstab_fetch dup @mtasurace:fsent @mtslocale @asucorrupt @ascuheap @acucorrupt @aculock @acsmem +@c strcmp dup ok +@c fstab_convert dup @mtasurace:fsent +This function returns the next entry of the @file{fstab} file which has +a string equal to @var{name} pointed to by the @code{fs_file} element. +Since there is normally exactly one entry for each mount point it +makes no sense to call this function more than once for the same +argument. If this is the first call to any of the functions handling +@file{fstab} since program start or the last call of @code{endfsent}, +the file will be opened. + +The function returns a pointer to a variable of type @code{struct +fstab}. This variable is shared by all threads and therefore this +function is not thread-safe. If an error occurred @code{getfsent} +returns a @code{NULL} pointer. +@end deftypefun + + +@node mtab +@subsubsection The @file{mtab} file +The following functions and data structure access the @file{mtab} file. + +@comment mntent.h +@comment BSD +@deftp {Data Type} {struct mntent} +This structure is used with the @code{getmntent}, @code{getmntent_r}, +@code{addmntent}, and @code{hasmntopt} functions. + +@table @code +@item char *mnt_fsname +This element contains a pointer to a string describing the name of the +special device from which the filesystem is mounted. It corresponds to +the @code{fs_spec} element in @code{struct fstab}. + +@item char *mnt_dir +This element points to a string describing the mount point of the +filesystem. It corresponds to the @code{fs_file} element in +@code{struct fstab}. + +@item char *mnt_type +@code{mnt_type} describes the filesystem type and is therefore +equivalent to @code{fs_vfstype} in @code{struct fstab}. @file{mntent.h} +defines a few symbolic names for some of the values this string can have. +But since the kernel can support arbitrary filesystems it does not +make much sense to give them symbolic names. If one knows the symbol +name one also knows the filesystem name. Nevertheless here follows the +list of the symbols provided in @file{mntent.h}. + +@vtable @code +@item MNTTYPE_IGNORE +This symbol expands to @code{"ignore"}. The value is sometimes used in +@file{fstab} files to make sure entries are not used without removing them. +@item MNTTYPE_NFS +Expands to @code{"nfs"}. Using this macro sometimes could make sense +since it names the default NFS implementation, in case both version 2 +and 3 are supported. +@item MNTTYPE_SWAP +This symbol expands to @code{"swap"}. It names the special @file{fstab} +entry which names one of the possibly multiple swap partitions. +@end vtable + +@item char *mnt_opts +The element contains a string describing the options used while mounting +the filesystem. As for the equivalent element @code{fs_mntops} of +@code{struct fstab} it is best to use the function @code{getsubopt} +(@pxref{Suboptions}) to access the parts of this string. + +The @file{mntent.h} file defines a number of macros with string values +which correspond to some of the options understood by the kernel. There +might be many more options which are possible so it doesn't make much sense +to rely on these macros but to be consistent here is the list: + +@vtable @code +@item MNTOPT_DEFAULTS +Expands to @code{"defaults"}. This option should be used alone since it +indicates all values for the customizable values are chosen to be the +default. +@item MNTOPT_RO +Expands to @code{"ro"}. See the @code{FSTAB_RO} value, it means the +filesystem is mounted read-only. +@item MNTOPT_RW +Expands to @code{"rw"}. See the @code{FSTAB_RW} value, it means the +filesystem is mounted with read and write permissions. +@item MNTOPT_SUID +Expands to @code{"suid"}. This means that the SUID bit (@pxref{How +Change Persona}) is respected when a program from the filesystem is +started. +@item MNTOPT_NOSUID +Expands to @code{"nosuid"}. This is the opposite of @code{MNTOPT_SUID}, +the SUID bit for all files from the filesystem is ignored. +@item MNTOPT_NOAUTO +Expands to @code{"noauto"}. At startup time the @code{mount} program +will ignore this entry if it is started with the @code{-a} option to +mount all filesystems mentioned in the @file{fstab} file. +@end vtable + +As for the @code{FSTAB_*} entries introduced above it is important to +use @code{strcmp} to check for equality. + +@item mnt_freq +This elements corresponds to @code{fs_freq} and also specifies the +frequency in days in which dumps are made. + +@item mnt_passno +This element is equivalent to @code{fs_passno} with the same meaning +which is uninteresting for all programs beside @code{dump}. +@end table +@end deftp + +For accessing the @file{mtab} file there is again a set of three +functions to access all entries in a row. Unlike the functions to +handle @file{fstab} these functions do not access a fixed file and there +is even a thread safe variant of the get function. Besides this @theglibc{} +contains functions to alter the file and test for specific options. + +@comment mntent.h +@comment BSD +@deftypefun {FILE *} setmntent (const char *@var{file}, const char *@var{mode}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@acsmem{} @acsfd{} @aculock{}}} +@c setmntent @ascuheap @asulock @acsmem @acsfd @aculock +@c strlen dup ok +@c mempcpy dup ok +@c memcpy dup ok +@c fopen dup @ascuheap @asulock @acsmem @acsfd @aculock +@c fsetlocking dup ok [no @mtasurace:stream @asulock: exclusive stream] +The @code{setmntent} function prepares the file named @var{FILE} which +must be in the format of a @file{fstab} and @file{mtab} file for the +upcoming processing through the other functions of the family. The +@var{mode} parameter can be chosen in the way the @var{opentype} +parameter for @code{fopen} (@pxref{Opening Streams}) can be chosen. If +the file is opened for writing the file is also allowed to be empty. + +If the file was successfully opened @code{setmntent} returns a file +handle for future use. Otherwise the return value is @code{NULL} +and @code{errno} is set accordingly. +@end deftypefun + +@comment mntent.h +@comment BSD +@deftypefun int endmntent (FILE *@var{stream}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c endmntent @ascuheap @asulock @aculock @acsmem @acsfd +@c fclose dup @ascuheap @asulock @aculock @acsmem @acsfd +This function takes for the @var{stream} parameter a file handle which +previously was returned from the @code{setmntent} call. +@code{endmntent} closes the stream and frees all resources. + +The return value is @math{1} unless an error occurred in which case it +is @math{0}. +@end deftypefun + +@comment mntent.h +@comment BSD +@deftypefun {struct mntent *} getmntent (FILE *@var{stream}) +@safety{@prelim{}@mtunsafe{@mtasurace{:mntentbuf} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asuinit{}}@acunsafe{@acuinit{} @acucorrupt{} @aculock{} @acsmem{}}} +@c getmntent @mtasurace:mntentbuf @mtslocale @asucorrupt @ascuheap @asuinit @acuinit @acucorrupt @aculock @acsmem +@c libc_once @ascuheap @asuinit @acuinit @acsmem +@c allocate @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c getmntent_r dup @mtslocale @asucorrupt @ascuheap @acucorrupt @aculock @acsmem +The @code{getmntent} function takes as the parameter a file handle +previously returned by a successful call to @code{setmntent}. It returns +a pointer to a static variable of type @code{struct mntent} which is +filled with the information from the next entry from the file currently +read. + +The file format used prescribes the use of spaces or tab characters to +separate the fields. This makes it harder to use names containing one +of these characters (e.g., mount points using spaces). Therefore +these characters are encoded in the files and the @code{getmntent} +function takes care of the decoding while reading the entries back in. +@code{'\040'} is used to encode a space character, @code{'\011'} to +encode a tab character, @code{'\012'} to encode a newline character, +and @code{'\\'} to encode a backslash. + +If there was an error or the end of the file is reached the return value +is @code{NULL}. + +This function is not thread-safe since all calls to this function return +a pointer to the same static variable. @code{getmntent_r} should be +used in situations where multiple threads access the file. +@end deftypefun + +@comment mntent.h +@comment BSD +@deftypefun {struct mntent *} getmntent_r (FILE *@var{stream}, struct mntent *@var{result}, char *@var{buffer}, int @var{bufsize}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{}}} +@c getmntent_r @mtslocale @asucorrupt @ascuheap @acucorrupt @aculock @acsmem +@c flockfile dup @aculock +@c fgets_unlocked dup @asucorrupt @acucorrupt [locked, so no @mtsrace:stream] +@c funlockfile dup @aculock +@c strchr dup ok +@c strspn dup ok +@c strsep dup ok +@c decode_name ok +@c sscanf dup @mtslocale @ascuheap @acsmem +The @code{getmntent_r} function is the reentrant variant of +@code{getmntent}. It also returns the next entry from the file and +returns a pointer. The actual variable the values are stored in is not +static, though. Instead the function stores the values in the variable +pointed to by the @var{result} parameter. Additional information (e.g., +the strings pointed to by the elements of the result) are kept in the +buffer of size @var{bufsize} pointed to by @var{buffer}. + +Escaped characters (space, tab, backslash) are converted back in the +same way as it happens for @code{getmentent}. + +The function returns a @code{NULL} pointer in error cases. Errors could be: +@itemize @bullet +@item +error while reading the file, +@item +end of file reached, +@item +@var{bufsize} is too small for reading a complete new entry. +@end itemize +@end deftypefun + +@comment mntent.h +@comment BSD +@deftypefun int addmntent (FILE *@var{stream}, const struct mntent *@var{mnt}) +@safety{@prelim{}@mtsafe{@mtsrace{:stream} @mtslocale{}}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{}}} +@c addmntent @mtasurace:stream @mtslocale @asucorrupt @acucorrupt +@c fseek dup @asucorrupt @acucorrupt [no @aculock] +@c encode_name ok +@c fprintf dup @mtslocale @asucorrupt @acucorrupt [no @ascuheap @acsmem, no @aculock] +@c fflush dup @asucorrupt @acucorrupt [no @aculock] +The @code{addmntent} function allows adding a new entry to the file +previously opened with @code{setmntent}. The new entries are always +appended. I.e., even if the position of the file descriptor is not at +the end of the file this function does not overwrite an existing entry +following the current position. + +The implication of this is that to remove an entry from a file one has +to create a new file while leaving out the entry to be removed and after +closing the file remove the old one and rename the new file to the +chosen name. + +This function takes care of spaces and tab characters in the names to be +written to the file. It converts them and the backslash character into +the format described in the @code{getmntent} description above. + +This function returns @math{0} in case the operation was successful. +Otherwise the return value is @math{1} and @code{errno} is set +appropriately. +@end deftypefun + +@comment mntent.h +@comment BSD +@deftypefun {char *} hasmntopt (const struct mntent *@var{mnt}, const char *@var{opt}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c hasmntopt ok +@c strlen dup ok +@c strstr dup ok +@c strchr dup ok +This function can be used to check whether the string pointed to by the +@code{mnt_opts} element of the variable pointed to by @var{mnt} contains +the option @var{opt}. If this is true a pointer to the beginning of the +option in the @code{mnt_opts} element is returned. If no such option +exists the function returns @code{NULL}. + +This function is useful to test whether a specific option is present but +when all options have to be processed one is better off with using the +@code{getsubopt} function to iterate over all options in the string. +@end deftypefun + +@node Other Mount Information +@subsubsection Other (Non-libc) Sources of Mount Information + +On a system with a Linux kernel and the @code{proc} filesystem, you can +get information on currently mounted filesystems from the file +@file{mounts} in the @code{proc} filesystem. Its format is similar to +that of the @file{mtab} file, but represents what is truly mounted +without relying on facilities outside the kernel to keep @file{mtab} up +to date. + + +@node Mount-Unmount-Remount, , Mount Information, Filesystem Handling +@subsection Mount, Unmount, Remount + +This section describes the functions for mounting, unmounting, and +remounting filesystems. + +Only the superuser can mount, unmount, or remount a filesystem. + +These functions do not access the @file{fstab} and @file{mtab} files. You +should maintain and use these separately. @xref{Mount Information}. + +The symbols in this section are declared in @file{sys/mount.h}. + +@comment sys/mount.h +@comment SVID, BSD +@deftypefun {int} mount (const char *@var{special_file}, const char *@var{dir}, const char *@var{fstype}, unsigned long int @var{options}, const void *@var{data}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall. + +@code{mount} mounts or remounts a filesystem. The two operations are +quite different and are merged rather unnaturally into this one function. +The @code{MS_REMOUNT} option, explained below, determines whether +@code{mount} mounts or remounts. + +For a mount, the filesystem on the block device represented by the +device special file named @var{special_file} gets mounted over the mount +point @var{dir}. This means that the directory @var{dir} (along with any +files in it) is no longer visible; in its place (and still with the name +@var{dir}) is the root directory of the filesystem on the device. + +As an exception, if the filesystem type (see below) is one which is not +based on a device (e.g. ``proc''), @code{mount} instantiates a +filesystem and mounts it over @var{dir} and ignores @var{special_file}. + +For a remount, @var{dir} specifies the mount point where the filesystem +to be remounted is (and remains) mounted and @var{special_file} is +ignored. Remounting a filesystem means changing the options that control +operations on the filesystem while it is mounted. It does not mean +unmounting and mounting again. + +For a mount, you must identify the type of the filesystem with +@var{fstype}. This type tells the kernel how to access the filesystem +and can be thought of as the name of a filesystem driver. The +acceptable values are system dependent. On a system with a Linux kernel +and the @code{proc} filesystem, the list of possible values is in the +file @file{filesystems} in the @code{proc} filesystem (e.g. type +@kbd{cat /proc/filesystems} to see the list). With a Linux kernel, the +types of filesystems that @code{mount} can mount, and their type names, +depends on what filesystem drivers are configured into the kernel or +loaded as loadable kernel modules. An example of a common value for +@var{fstype} is @code{ext2}. + +For a remount, @code{mount} ignores @var{fstype}. + +@c This is traditionally called "rwflag" for historical reasons. +@c No point in confusing people today, though. +@var{options} specifies a variety of options that apply until the +filesystem is unmounted or remounted. The precise meaning of an option +depends on the filesystem and with some filesystems, an option may have +no effect at all. Furthermore, for some filesystems, some of these +options (but never @code{MS_RDONLY}) can be overridden for individual +file accesses via @code{ioctl}. + +@var{options} is a bit string with bit fields defined using the +following mask and masked value macros: + +@vtable @code +@item MS_MGC_MASK +This multibit field contains a magic number. If it does not have the value +@code{MS_MGC_VAL}, @code{mount} assumes all the following bits are zero and +the @var{data} argument is a null string, regardless of their actual values. + +@item MS_REMOUNT +This bit on means to remount the filesystem. Off means to mount it. +@c There is a mask MS_RMT_MASK in mount.h that says only two of the options +@c can be reset by remount. But the Linux kernel has its own version of +@c MS_RMT_MASK that says they all can be reset. As far as I can tell, +@c libc just passes the arguments straight through to the kernel. + +@item MS_RDONLY +This bit on specifies that no writing to the filesystem shall be allowed +while it is mounted. This cannot be overridden by @code{ioctl}. This +option is available on nearly all filesystems. + +@item MS_NOSUID +This bit on specifies that Setuid and Setgid permissions on files in the +filesystem shall be ignored while it is mounted. + +@item MS_NOEXEC +This bit on specifies that no files in the filesystem shall be executed +while the filesystem is mounted. + +@item MS_NODEV +This bit on specifies that no device special files in the filesystem +shall be accessible while the filesystem is mounted. + +@item MS_SYNCHRONOUS +This bit on specifies that all writes to the filesystem while it is +mounted shall be synchronous; i.e., data shall be synced before each +write completes rather than held in the buffer cache. + +@item MS_MANDLOCK +This bit on specifies that mandatory locks on files shall be permitted while +the filesystem is mounted. + +@item MS_NOATIME +This bit on specifies that access times of files shall not be updated when +the files are accessed while the filesystem is mounted. + +@item MS_NODIRATIME +This bit on specifies that access times of directories shall not be updated +when the directories are accessed while the filesystem in mounted. + +@c there is also S_QUOTA Linux fs.h (mount.h still uses its former name +@c S_WRITE), but I can't see what it does. Turns on quotas, I guess. + +@end vtable + +Any bits not covered by the above masks should be set off; otherwise, +results are undefined. + +The meaning of @var{data} depends on the filesystem type and is controlled +entirely by the filesystem driver in the kernel. + +Example: + +@smallexample +@group +#include <sys/mount.h> + +mount("/dev/hdb", "/cdrom", MS_MGC_VAL | MS_RDONLY | MS_NOSUID, ""); + +mount("/dev/hda2", "/mnt", MS_MGC_VAL | MS_REMOUNT, ""); + +@end group +@end smallexample + +Appropriate arguments for @code{mount} are conventionally recorded in +the @file{fstab} table. @xref{Mount Information}. + +The return value is zero if the mount or remount is successful. Otherwise, +it is @code{-1} and @code{errno} is set appropriately. The values of +@code{errno} are filesystem dependent, but here is a general list: + +@table @code +@item EPERM +The process is not superuser. +@item ENODEV +The file system type @var{fstype} is not known to the kernel. +@item ENOTBLK +The file @var{dev} is not a block device special file. +@item EBUSY + +@itemize @bullet + +@item +The device is already mounted. + +@item +The mount point is busy. (E.g. it is some process' working directory or +has a filesystem mounted on it already). + +@item +The request is to remount read-only, but there are files open for writing. +@end itemize + +@item EINVAL +@itemize @bullet + +@item +A remount was attempted, but there is no filesystem mounted over the +specified mount point. + +@item +The supposed filesystem has an invalid superblock. + +@end itemize + +@item EACCES +@itemize @bullet + +@item +The filesystem is inherently read-only (possibly due to a switch on the +device) and the process attempted to mount it read/write (by setting the +@code{MS_RDONLY} bit off). + +@item +@var{special_file} or @var{dir} is not accessible due to file permissions. + +@item +@var{special_file} is not accessible because it is in a filesystem that is +mounted with the @code{MS_NODEV} option. + +@end itemize + +@item EM_FILE +The table of dummy devices is full. @code{mount} needs to create a +dummy device (aka ``unnamed'' device) if the filesystem being mounted is +not one that uses a device. + +@end table + +@end deftypefun + + +@comment sys/mount.h +@comment GNU +@deftypefun {int} umount2 (const char *@var{file}, int @var{flags}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall. + +@code{umount2} unmounts a filesystem. + +You can identify the filesystem to unmount either by the device special +file that contains the filesystem or by the mount point. The effect is +the same. Specify either as the string @var{file}. + +@var{flags} contains the one-bit field identified by the following +mask macro: + +@vtable @code + +@item MNT_FORCE +This bit on means to force the unmounting even if the filesystem is +busy, by making it unbusy first. If the bit is off and the filesystem is +busy, @code{umount2} fails with @code{errno} = @code{EBUSY}. Depending +on the filesystem, this may override all, some, or no busy conditions. + +@end vtable + +All other bits in @var{flags} should be set to zero; otherwise, the result +is undefined. + +Example: + +@smallexample +@group +#include <sys/mount.h> + +umount2("/mnt", MNT_FORCE); + +umount2("/dev/hdd1", 0); + +@end group +@end smallexample + +After the filesystem is unmounted, the directory that was the mount point +is visible, as are any files in it. + +As part of unmounting, @code{umount2} syncs the filesystem. + +If the unmounting is successful, the return value is zero. Otherwise, it +is @code{-1} and @code{errno} is set accordingly: + +@table @code +@item EPERM +The process is not superuser. +@item EBUSY +The filesystem cannot be unmounted because it is busy. E.g. it contains +a directory that is some process's working directory or a file that some +process has open. With some filesystems in some cases, you can avoid +this failure with the @code{MNT_FORCE} option. + +@item EINVAL +@var{file} validly refers to a file, but that file is neither a mount +point nor a device special file of a currently mounted filesystem. + +@end table + +This function is not available on all systems. +@end deftypefun + +@comment sys/mount.h +@comment SVID, GNU +@deftypefun {int} umount (const char *@var{file}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall or wrapper for umount2. + +@code{umount} does the same thing as @code{umount2} with @var{flags} set +to zeroes. It is more widely available than @code{umount2} but since it +lacks the possibility to forcefully unmount a filesystem is deprecated +when @code{umount2} is also available. +@end deftypefun + + + +@node System Parameters +@section System Parameters + +This section describes the @code{sysctl} function, which gets and sets +a variety of system parameters. + +The symbols used in this section are declared in the file @file{sys/sysctl.h}. + +@comment sys/sysctl.h +@comment BSD +@deftypefun int sysctl (int *@var{names}, int @var{nlen}, void *@var{oldval}, size_t *@var{oldlenp}, void *@var{newval}, size_t @var{newlen}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct syscall, Linux only. + +@code{sysctl} gets or sets a specified system parameter. There are so +many of these parameters that it is not practical to list them all here, +but here are some examples: + +@itemize @bullet +@item network domain name +@item paging parameters +@item network Address Resolution Protocol timeout time +@item maximum number of files that may be open +@item root filesystem device +@item when kernel was built +@end itemize + +The set of available parameters depends on the kernel configuration and +can change while the system is running, particularly when you load and +unload loadable kernel modules. + +The system parameters with which @code{sysctl} is concerned are arranged +in a hierarchical structure like a hierarchical filesystem. To identify +a particular parameter, you specify a path through the structure in a +way analogous to specifying the pathname of a file. Each component of +the path is specified by an integer and each of these integers has a +macro defined for it by @file{sys/sysctl.h}. @var{names} is the path, in +the form of an array of integers. Each component of the path is one +element of the array, in order. @var{nlen} is the number of components +in the path. + +For example, the first component of the path for all the paging +parameters is the value @code{CTL_VM}. For the free page thresholds, the +second component of the path is @code{VM_FREEPG}. So to get the free +page threshold values, make @var{names} an array containing the two +elements @code{CTL_VM} and @code{VM_FREEPG} and make @var{nlen} = 2. + + +The format of the value of a parameter depends on the parameter. +Sometimes it is an integer; sometimes it is an ASCII string; sometimes +it is an elaborate structure. In the case of the free page thresholds +used in the example above, the parameter value is a structure containing +several integers. + +In any case, you identify a place to return the parameter's value with +@var{oldval} and specify the amount of storage available at that +location as *@var{oldlenp}. *@var{oldlenp} does double duty because it +is also the output location that contains the actual length of the +returned value. + +If you don't want the parameter value returned, specify a null pointer +for @var{oldval}. + +To set the parameter, specify the address and length of the new value +as @var{newval} and @var{newlen}. If you don't want to set the parameter, +specify a null pointer as @var{newval}. + +If you get and set a parameter in the same @code{sysctl} call, the value +returned is the value of the parameter before it was set. + +Each system parameter has a set of permissions similar to the +permissions for a file (including the permissions on directories in its +path) that determine whether you may get or set it. For the purposes of +these permissions, every parameter is considered to be owned by the +superuser and Group 0 so processes with that effective uid or gid may +have more access to system parameters. Unlike with files, the superuser +does not invariably have full permission to all system parameters, because +some of them are designed not to be changed ever. + + +@code{sysctl} returns a zero return value if it succeeds. Otherwise, it +returns @code{-1} and sets @code{errno} appropriately. Besides the +failures that apply to all system calls, the following are the +@code{errno} codes for all possible failures: + +@table @code +@item EPERM +The process is not permitted to access one of the components of the +path of the system parameter or is not permitted to access the system parameter +itself in the way (read or write) that it requested. +@c There is some indication in the Linux 2.2 code that the code is trying to +@c return EACCES here, but the EACCES value never actually makes it to the +@c user. +@item ENOTDIR +There is no system parameter corresponding to @var{name}. +@item EFAULT +@var{oldval} is not null, which means the process wanted to read the parameter, +but *@var{oldlenp} is zero, so there is no place to return it. +@item EINVAL +@itemize @bullet +@item +The process attempted to set a system parameter to a value that is not valid +for that parameter. +@item +The space provided for the return of the system parameter is not the right +size for that parameter. +@end itemize +@item ENOMEM +This value may be returned instead of the more correct @code{EINVAL} in some +cases where the space provided for the return of the system parameter is too +small. + +@end table + +@end deftypefun + +If you have a Linux kernel with the @code{proc} filesystem, you can get +and set most of the same parameters by reading and writing to files in +the @code{sys} directory of the @code{proc} filesystem. In the @code{sys} +directory, the directory structure represents the hierarchical structure +of the parameters. E.g. you can display the free page thresholds with +@smallexample +cat /proc/sys/vm/freepages +@end smallexample +@c In Linux, the sysctl() and /proc instances of the parameter are created +@c together. The proc filesystem accesses the same data structure as +@c sysctl(), which has special fields in it for /proc. But it is still +@c possible to create a sysctl-only parameter. + +Some more traditional and more widely available, though less general, +@glibcadj{} functions for getting and setting some of the same system +parameters are: + +@itemize @bullet +@item +@code{getdomainname}, @code{setdomainname} +@item +@code{gethostname}, @code{sethostname} (@xref{Host Identification}.) +@item +@code{uname} (@xref{Platform Type}.) +@end itemize diff --git a/REORG.TODO/manual/syslog.texi b/REORG.TODO/manual/syslog.texi new file mode 100644 index 0000000000..7b73a091fe --- /dev/null +++ b/REORG.TODO/manual/syslog.texi @@ -0,0 +1,578 @@ +@node Syslog, Mathematics, Low-Level Terminal Interface, Top +@c %MENU% System logging and messaging +@chapter Syslog + + +This chapter describes facilities for issuing and logging messages of +system administration interest. This chapter has nothing to do with +programs issuing messages to their own users or keeping private logs +(One would typically do that with the facilities described in +@ref{I/O on Streams}). + +Most systems have a facility called ``Syslog'' that allows programs to +submit messages of interest to system administrators and can be +configured to pass these messages on in various ways, such as printing +on the console, mailing to a particular person, or recording in a log +file for future reference. + +A program uses the facilities in this chapter to submit such messages. + +@menu +* Overview of Syslog:: Overview of a system's Syslog facility +* Submitting Syslog Messages:: Functions to submit messages to Syslog +@end menu + +@node Overview of Syslog +@section Overview of Syslog + +System administrators have to deal with lots of different kinds of +messages from a plethora of subsystems within each system, and usually +lots of systems as well. For example, an FTP server might report every +connection it gets. The kernel might report hardware failures on a disk +drive. A DNS server might report usage statistics at regular intervals. + +Some of these messages need to be brought to a system administrator's +attention immediately. And it may not be just any system administrator +-- there may be a particular system administrator who deals with a +particular kind of message. Other messages just need to be recorded for +future reference if there is a problem. Still others may need to have +information extracted from them by an automated process that generates +monthly reports. + +To deal with these messages, most Unix systems have a facility called +"Syslog." It is generally based on a daemon called ``Syslogd'' +Syslogd listens for messages on a Unix domain socket named +@file{/dev/log}. Based on classification information in the messages +and its configuration file (usually @file{/etc/syslog.conf}), Syslogd +routes them in various ways. Some of the popular routings are: + +@itemize @bullet +@item +Write to the system console +@item +Mail to a specific user +@item +Write to a log file +@item +Pass to another daemon +@item +Discard +@end itemize + +Syslogd can also handle messages from other systems. It listens on the +@code{syslog} UDP port as well as the local socket for messages. + +Syslog can handle messages from the kernel itself. But the kernel +doesn't write to @file{/dev/log}; rather, another daemon (sometimes +called ``Klogd'') extracts messages from the kernel and passes them on to +Syslog as any other process would (and it properly identifies them as +messages from the kernel). + +Syslog can even handle messages that the kernel issued before Syslogd or +Klogd was running. A Linux kernel, for example, stores startup messages +in a kernel message ring and they are normally still there when Klogd +later starts up. Assuming Syslogd is running by the time Klogd starts, +Klogd then passes everything in the message ring to it. + +In order to classify messages for disposition, Syslog requires any process +that submits a message to it to provide two pieces of classification +information with it: + +@table @asis +@item facility +This identifies who submitted the message. There are a small number of +facilities defined. The kernel, the mail subsystem, and an FTP server +are examples of recognized facilities. For the complete list, +@xref{syslog; vsyslog}. Keep in mind that these are +essentially arbitrary classifications. "Mail subsystem" doesn't have any +more meaning than the system administrator gives to it. + +@item priority +This tells how important the content of the message is. Examples of +defined priority values are: debug, informational, warning and critical. +For the complete list, see @ref{syslog; vsyslog}. Except for +the fact that the priorities have a defined order, the meaning of each +of these priorities is entirely determined by the system administrator. + +@end table + +A ``facility/priority'' is a number that indicates both the facility +and the priority. + +@strong{Warning:} This terminology is not universal. Some people use +``level'' to refer to the priority and ``priority'' to refer to the +combination of facility and priority. A Linux kernel has a concept of a +message ``level,'' which corresponds both to a Syslog priority and to a +Syslog facility/priority (It can be both because the facility code for +the kernel is zero, and that makes priority and facility/priority the +same value). + +@Theglibc{} provides functions to submit messages to Syslog. They +do it by writing to the @file{/dev/log} socket. @xref{Submitting Syslog +Messages}. + +The @glibcadj{} functions only work to submit messages to the Syslog +facility on the same system. To submit a message to the Syslog facility +on another system, use the socket I/O functions to write a UDP datagram +to the @code{syslog} UDP port on that system. @xref{Sockets}. + + +@node Submitting Syslog Messages +@section Submitting Syslog Messages + +@Theglibc{} provides functions to submit messages to the Syslog +facility: + +@menu +* openlog:: Open connection to Syslog +* syslog; vsyslog:: Submit message to Syslog +* closelog:: Close connection to Syslog +* setlogmask:: Cause certain messages to be ignored +* Syslog Example:: Example of all of the above +@end menu + +These functions only work to submit messages to the Syslog facility on +the same system. To submit a message to the Syslog facility on another +system, use the socket I/O functions to write a UDP datagram to the +@code{syslog} UDP port on that system. @xref{Sockets}. + + + +@node openlog +@subsection openlog + +The symbols referred to in this section are declared in the file +@file{syslog.h}. + +@comment syslog.h +@comment BSD +@deftypefun void openlog (const char *@var{ident}, int @var{option}, int @var{facility}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c openlog @asulock @aculock @acsfd +@c libc_lock_lock @asulock @aculock +@c openlog_internal @acsfd [always guarded by syslog_lock, so no race] +@c strncpy dup ok +@c socket dup @acsfd +@c fcntl dup ok +@c connect dup ok +@c close dup @acsfd +@c cancel_handler(NULL) @aculock +@c libc_lock_unlock @aculock + +@code{openlog} opens or reopens a connection to Syslog in preparation +for submitting messages. + +@var{ident} is an arbitrary identification string which future +@code{syslog} invocations will prefix to each message. This is intended +to identify the source of the message, and people conventionally set it +to the name of the program that will submit the messages. + +If @var{ident} is NULL, or if @code{openlog} is not called, the default +identification string used in Syslog messages will be the program name, +taken from argv[0]. + +Please note that the string pointer @var{ident} will be retained +internally by the Syslog routines. You must not free the memory that +@var{ident} points to. It is also dangerous to pass a reference to an +automatic variable since leaving the scope would mean ending the +lifetime of the variable. If you want to change the @var{ident} string, +you must call @code{openlog} again; overwriting the string pointed to by +@var{ident} is not thread-safe. + +You can cause the Syslog routines to drop the reference to @var{ident} and +go back to the default string (the program name taken from argv[0]), by +calling @code{closelog}: @xref{closelog}. + +In particular, if you are writing code for a shared library that might get +loaded and then unloaded (e.g. a PAM module), and you use @code{openlog}, +you must call @code{closelog} before any point where your library might +get unloaded, as in this example: + +@smallexample +#include <syslog.h> + +void +shared_library_function (void) +@{ + openlog ("mylibrary", option, priority); + + syslog (LOG_INFO, "shared library has been invoked"); + + closelog (); +@} +@end smallexample + +Without the call to @code{closelog}, future invocations of @code{syslog} +by the program using the shared library may crash, if the library gets +unloaded and the memory containing the string @code{"mylibrary"} becomes +unmapped. This is a limitation of the BSD syslog interface. + +@code{openlog} may or may not open the @file{/dev/log} socket, depending +on @var{option}. If it does, it tries to open it and connect it as a +stream socket. If that doesn't work, it tries to open it and connect it +as a datagram socket. The socket has the ``Close on Exec'' attribute, +so the kernel will close it if the process performs an exec. + +You don't have to use @code{openlog}. If you call @code{syslog} without +having called @code{openlog}, @code{syslog} just opens the connection +implicitly and uses defaults for the information in @var{ident} and +@var{options}. + +@var{options} is a bit string, with the bits as defined by the following +single bit masks: + +@vtable @code +@item LOG_PERROR +If on, @code{openlog} sets up the connection so that any @code{syslog} +on this connection writes its message to the calling process' Standard +Error stream in addition to submitting it to Syslog. If off, @code{syslog} +does not write the message to Standard Error. + +@item LOG_CONS +If on, @code{openlog} sets up the connection so that a @code{syslog} on +this connection that fails to submit a message to Syslog writes the +message instead to system console. If off, @code{syslog} does not write +to the system console (but of course Syslog may write messages it +receives to the console). + +@item LOG_PID +When on, @code{openlog} sets up the connection so that a @code{syslog} +on this connection inserts the calling process' Process ID (PID) into +the message. When off, @code{openlog} does not insert the PID. + +@item LOG_NDELAY +When on, @code{openlog} opens and connects the @file{/dev/log} socket. +When off, a future @code{syslog} call must open and connect the socket. + +@strong{Portability note:} In early systems, the sense of this bit was +exactly the opposite. + +@item LOG_ODELAY +This bit does nothing. It exists for backward compatibility. + +@end vtable + +If any other bit in @var{options} is on, the result is undefined. + +@var{facility} is the default facility code for this connection. A +@code{syslog} on this connection that specifies default facility causes +this facility to be associated with the message. See @code{syslog} for +possible values. A value of zero means the default, which is +@code{LOG_USER}. + +If a Syslog connection is already open when you call @code{openlog}, +@code{openlog} ``reopens'' the connection. Reopening is like opening +except that if you specify zero for the default facility code, the +default facility code simply remains unchanged and if you specify +LOG_NDELAY and the socket is already open and connected, @code{openlog} +just leaves it that way. + +@c There is a bug in closelog() (glibc 2.1.3) wherein it does not reset the +@c default log facility to LOG_USER, which means the default default log +@c facility could be whatever the default log facility was for a previous +@c Syslog connection. I have documented what the function should be rather +@c than what it is because I think if anyone ever gets concerned, the code +@c will change. + +@end deftypefun + + +@node syslog; vsyslog +@subsection syslog, vsyslog + +The symbols referred to in this section are declared in the file +@file{syslog.h}. + +@c syslog() is implemented as a call to vsyslog(). +@comment syslog.h +@comment BSD +@deftypefun void syslog (int @var{facility_priority}, const char *@var{format}, @dots{}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c syslog @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c va_start dup ok +@c vsyslog_chk @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c syslog(INTERNALLOG) dup @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c open_memstream @ascuheap @acsmem +@c stpcpy dup ok +@c getpid dup ok +@c mempcpy dup ok +@c fsetlocking [no @mtasurace:stream @asulock for exclusive stream] +@c fprintf @mtslocale @ascuheap @acsmem [no @asucorrupt @aculock @acucorrupt on temp memstream] +@c time dup ok +@c localtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c strftime_l(C) dup @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c ftell dup ok [no @asucorrupt @aculock @acucorrupt on temp memstream] +@c fputs_unlocked dup ok [no @mtasurace:stream @asucorrupt @acucorrupt on temp memstream] +@c putc_unlocked dup ok [no @mtasurace:stream @asucorrupt @acucorrupt on temp memstream] +@c vfprintf/vfprintf_chk dup @mtslocale @ascuheap @acsmem [no @mtasurace:stream @asucorrupt @acucorrupt on temp memstream] +@c fclose dup @ascuheap @acsmem [no @asulock @aculock @acsfd on caller-locked memstream] +@c writev dup ok +@c libc_lock_lock dup @asulock @aculock +@c memset dup ok +@c sigemptyset dup ok +@c sigaction(SIGPIPE) dup @mtasusig:PIPE @acusig:PIPE +@c openlog_internal dup @acsfd +@c send dup ok +@c closelog_internal dup @acsfd +@c open dup @acsfd +@c dprintf dup ok +@c libc_lock_unlock @asulock @aculock +@c free dup @acsuheap @acsmem +@c va_end dup ok + +@code{syslog} submits a message to the Syslog facility. It does this by +writing to the Unix domain socket @code{/dev/log}. + +@code{syslog} submits the message with the facility and priority indicated +by @var{facility_priority}. The macro @code{LOG_MAKEPRI} generates a +facility/priority from a facility and a priority, as in the following +example: + +@smallexample +LOG_MAKEPRI(LOG_USER, LOG_WARNING) +@end smallexample + +The possible values for the facility code are (macros): + +@c Internally, there is also LOG_KERN, but LOG_KERN == 0, which means +@c if you try to use it here, just selects default. + +@vtable @code +@item LOG_USER +A miscellaneous user process +@item LOG_MAIL +Mail +@item LOG_DAEMON +A miscellaneous system daemon +@item LOG_AUTH +Security (authorization) +@item LOG_SYSLOG +Syslog +@item LOG_LPR +Central printer +@item LOG_NEWS +Network news (e.g. Usenet) +@item LOG_UUCP +UUCP +@item LOG_CRON +Cron and At +@item LOG_AUTHPRIV +Private security (authorization) +@item LOG_FTP +Ftp server +@item LOG_LOCAL0 +Locally defined +@item LOG_LOCAL1 +Locally defined +@item LOG_LOCAL2 +Locally defined +@item LOG_LOCAL3 +Locally defined +@item LOG_LOCAL4 +Locally defined +@item LOG_LOCAL5 +Locally defined +@item LOG_LOCAL6 +Locally defined +@item LOG_LOCAL7 +Locally defined +@end vtable + +Results are undefined if the facility code is anything else. + +@strong{NB:} @code{syslog} recognizes one other facility code: that of +the kernel. But you can't specify that facility code with these +functions. If you try, it looks the same to @code{syslog} as if you are +requesting the default facility. But you wouldn't want to anyway, +because any program that uses @theglibc{} is not the kernel. + +You can use just a priority code as @var{facility_priority}. In that +case, @code{syslog} assumes the default facility established when the +Syslog connection was opened. @xref{Syslog Example}. + +The possible values for the priority code are (macros): + +@vtable @code +@item LOG_EMERG +The message says the system is unusable. +@item LOG_ALERT +Action on the message must be taken immediately. +@item LOG_CRIT +The message states a critical condition. +@item LOG_ERR +The message describes an error. +@item LOG_WARNING +The message is a warning. +@item LOG_NOTICE +The message describes a normal but important event. +@item LOG_INFO +The message is purely informational. +@item LOG_DEBUG +The message is only for debugging purposes. +@end vtable + +Results are undefined if the priority code is anything else. + +If the process does not presently have a Syslog connection open (i.e., +it did not call @code{openlog}), @code{syslog} implicitly opens the +connection the same as @code{openlog} would, with the following defaults +for information that would otherwise be included in an @code{openlog} +call: The default identification string is the program name. The +default default facility is @code{LOG_USER}. The default for all the +connection options in @var{options} is as if those bits were off. +@code{syslog} leaves the Syslog connection open. + +If the @file{/dev/log} socket is not open and connected, @code{syslog} +opens and connects it, the same as @code{openlog} with the +@code{LOG_NDELAY} option would. + +@code{syslog} leaves @file{/dev/log} open and connected unless its attempt +to send the message failed, in which case @code{syslog} closes it (with the +hope that a future implicit open will restore the Syslog connection to a +usable state). + +Example: + +@smallexample + +#include <syslog.h> +syslog (LOG_MAKEPRI(LOG_LOCAL1, LOG_ERROR), + "Unable to make network connection to %s. Error=%m", host); + +@end smallexample + +@end deftypefun + + +@comment syslog.h +@comment BSD +@deftypefun void vsyslog (int @var{facility_priority}, const char *@var{format}, va_list @var{arglist}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c vsyslog @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c vsyslog_chk dup @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd + +This is functionally identical to @code{syslog}, with the BSD style variable +length argument. + +@end deftypefun + + +@node closelog +@subsection closelog + +The symbols referred to in this section are declared in the file +@file{syslog.h}. + +@comment syslog.h +@comment BSD +@deftypefun void closelog (void) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c closelog @asulock @aculock @acsfd +@c libc_lock_lock @asulock @aculock +@c closelog_internal @acsfd [always guarded by syslog_lock, so no race] +@c close dup@acsfd +@c cancel_handler(NULL) @aculock +@c libc_lock_unlock @aculock + +@code{closelog} closes the current Syslog connection, if there is one. +This includes closing the @file{/dev/log} socket, if it is open. +@code{closelog} also sets the identification string for Syslog messages +back to the default, if @code{openlog} was called with a non-NULL argument +to @var{ident}. The default identification string is the program name +taken from argv[0]. + +If you are writing shared library code that uses @code{openlog} to +generate custom syslog output, you should use @code{closelog} to drop +@theglibc{}'s internal reference to the @var{ident} pointer when you are +done. Please read the section on @code{openlog} for more information: +@xref{openlog}. + +@code{closelog} does not flush any buffers. You do not have to call +@code{closelog} before re-opening a Syslog connection with @code{openlog}. +Syslog connections are automatically closed on exec or exit. + +@end deftypefun + + +@node setlogmask +@subsection setlogmask + +The symbols referred to in this section are declared in the file +@file{syslog.h}. + +@comment syslog.h +@comment BSD +@deftypefun int setlogmask (int @var{mask}) +@safety{@prelim{}@mtunsafe{@mtasurace{:LogMask}}@asunsafe{}@acsafe{}} +@c Read and modify are not guarded by syslog_lock, so concurrent changes +@c or even uses are undefined. This should use an atomic swap instead, +@c at least for modifications. + +@code{setlogmask} sets a mask (the ``logmask'') that determines which +future @code{syslog} calls shall be ignored. If a program has not +called @code{setlogmask}, @code{syslog} doesn't ignore any calls. You +can use @code{setlogmask} to specify that messages of particular +priorities shall be ignored in the future. + +A @code{setlogmask} call overrides any previous @code{setlogmask} call. + +Note that the logmask exists entirely independently of opening and +closing of Syslog connections. + +Setting the logmask has a similar effect to, but is not the same as, +configuring Syslog. The Syslog configuration may cause Syslog to +discard certain messages it receives, but the logmask causes certain +messages never to get submitted to Syslog in the first place. + +@var{mask} is a bit string with one bit corresponding to each of the +possible message priorities. If the bit is on, @code{syslog} handles +messages of that priority normally. If it is off, @code{syslog} +discards messages of that priority. Use the message priority macros +described in @ref{syslog; vsyslog} and the @code{LOG_MASK} to construct +an appropriate @var{mask} value, as in this example: + +@smallexample +LOG_MASK(LOG_EMERG) | LOG_MASK(LOG_ERROR) +@end smallexample + +or + +@smallexample +~(LOG_MASK(LOG_INFO)) +@end smallexample + +There is also a @code{LOG_UPTO} macro, which generates a mask with the bits +on for a certain priority and all priorities above it: + +@smallexample +LOG_UPTO(LOG_ERROR) +@end smallexample + +The unfortunate naming of the macro is due to the fact that internally, +higher numbers are used for lower message priorities. + +@end deftypefun + + +@node Syslog Example +@subsection Syslog Example + +Here is an example of @code{openlog}, @code{syslog}, and @code{closelog}: + +This example sets the logmask so that debug and informational messages +get discarded without ever reaching Syslog. So the second @code{syslog} +in the example does nothing. + +@smallexample +#include <syslog.h> + +setlogmask (LOG_UPTO (LOG_NOTICE)); + +openlog ("exampleprog", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); + +syslog (LOG_NOTICE, "Program started by User %d", getuid ()); +syslog (LOG_INFO, "A tree falls in a forest"); + +closelog (); + +@end smallexample diff --git a/REORG.TODO/manual/terminal.texi b/REORG.TODO/manual/terminal.texi new file mode 100644 index 0000000000..0c5fdd1a76 --- /dev/null +++ b/REORG.TODO/manual/terminal.texi @@ -0,0 +1,2296 @@ +@node Low-Level Terminal Interface, Syslog, Sockets, Top +@c %MENU% How to change the characteristics of a terminal device +@chapter Low-Level Terminal Interface + +This chapter describes functions that are specific to terminal devices. +You can use these functions to do things like turn off input echoing; +set serial line characteristics such as line speed and flow control; and +change which characters are used for end-of-file, command-line editing, +sending signals, and similar control functions. + +Most of the functions in this chapter operate on file descriptors. +@xref{Low-Level I/O}, for more information about what a file +descriptor is and how to open a file descriptor for a terminal device. + +@menu +* Is It a Terminal:: How to determine if a file is a terminal + device, and what its name is. +* I/O Queues:: About flow control and typeahead. +* Canonical or Not:: Two basic styles of input processing. +* Terminal Modes:: How to examine and modify flags controlling + details of terminal I/O: echoing, + signals, editing. Posix. +* BSD Terminal Modes:: BSD compatible terminal mode setting +* Line Control:: Sending break sequences, clearing + terminal buffers @dots{} +* Noncanon Example:: How to read single characters without echo. +* Pseudo-Terminals:: How to open a pseudo-terminal. +@end menu + +@node Is It a Terminal +@section Identifying Terminals +@cindex terminal identification +@cindex identifying terminals + +The functions described in this chapter only work on files that +correspond to terminal devices. You can find out whether a file +descriptor is associated with a terminal by using the @code{isatty} +function. + +@pindex unistd.h +Prototypes for the functions in this section are declared in the header +file @file{unistd.h}. + +@comment unistd.h +@comment POSIX.1 +@deftypefun int isatty (int @var{filedes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c isatty ok +@c tcgetattr dup ok +This function returns @code{1} if @var{filedes} is a file descriptor +associated with an open terminal device, and @math{0} otherwise. +@end deftypefun + +If a file descriptor is associated with a terminal, you can get its +associated file name using the @code{ttyname} function. See also the +@code{ctermid} function, described in @ref{Identifying the Terminal}. + +@comment unistd.h +@comment POSIX.1 +@deftypefun {char *} ttyname (int @var{filedes}) +@safety{@prelim{}@mtunsafe{@mtasurace{:ttyname}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c ttyname @mtasurace:ttyname @ascuheap @asulock @aculock @acsmem @acsfd +@c isatty dup ok +@c fstat dup ok +@c memcpy dup ok +@c getttyname @mtasurace:ttyname @ascuheap @asulock @aculock @acsmem @acsfd +@c opendir @ascuheap @acsmem @acsfd +@c readdir ok [protected by exclusive access] +@c strcmp dup ok +@c free dup @asulock @aculock @acsfd @acsmem +@c malloc dup @asulock @aculock @acsfd @acsmem +@c closedir @ascuheap @acsmem @acsfd +@c mempcpy dup ok +@c stat dup ok +If the file descriptor @var{filedes} is associated with a terminal +device, the @code{ttyname} function returns a pointer to a +statically-allocated, null-terminated string containing the file name of +the terminal file. The value is a null pointer if the file descriptor +isn't associated with a terminal, or the file name cannot be determined. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int ttyname_r (int @var{filedes}, char *@var{buf}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{}}@acunsafe{@acsmem{} @acsfd{}}} +@c ttyname_r @ascuheap @acsmem @acsfd +@c isatty dup ok +@c fstat dup ok +@c memcpy dup ok +@c getttyname_r @ascuheap @acsmem @acsfd +@c opendir @ascuheap @acsmem @acsfd +@c readdir ok [protected by exclusive access] +@c strcmp dup ok +@c closedir @ascuheap @acsmem @acsfd +@c stpncpy dup ok +@c stat dup ok +The @code{ttyname_r} function is similar to the @code{ttyname} function +except that it places its result into the user-specified buffer starting +at @var{buf} with length @var{len}. + +The normal return value from @code{ttyname_r} is @math{0}. Otherwise an +error number is returned to indicate the error. The following +@code{errno} error conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item ENOTTY +The @var{filedes} is not associated with a terminal. + +@item ERANGE +The buffer length @var{len} is too small to store the string to be +returned. +@end table +@end deftypefun + +@node I/O Queues +@section I/O Queues + +Many of the remaining functions in this section refer to the input and +output queues of a terminal device. These queues implement a form of +buffering @emph{within the kernel} independent of the buffering +implemented by I/O streams (@pxref{I/O on Streams}). + +@cindex terminal input queue +@cindex typeahead buffer +The @dfn{terminal input queue} is also sometimes referred to as its +@dfn{typeahead buffer}. It holds the characters that have been received +from the terminal but not yet read by any process. + +The size of the input queue is described by the @code{MAX_INPUT} and +@w{@code{_POSIX_MAX_INPUT}} parameters; see @ref{Limits for Files}. You +are guaranteed a queue size of at least @code{MAX_INPUT}, but the queue +might be larger, and might even dynamically change size. If input flow +control is enabled by setting the @code{IXOFF} input mode bit +(@pxref{Input Modes}), the terminal driver transmits STOP and START +characters to the terminal when necessary to prevent the queue from +overflowing. Otherwise, input may be lost if it comes in too fast from +the terminal. In canonical mode, all input stays in the queue until a +newline character is received, so the terminal input queue can fill up +when you type a very long line. @xref{Canonical or Not}. + +@cindex terminal output queue +The @dfn{terminal output queue} is like the input queue, but for output; +it contains characters that have been written by processes, but not yet +transmitted to the terminal. If output flow control is enabled by +setting the @code{IXON} input mode bit (@pxref{Input Modes}), the +terminal driver obeys START and STOP characters sent by the terminal to +stop and restart transmission of output. + +@dfn{Clearing} the terminal input queue means discarding any characters +that have been received but not yet read. Similarly, clearing the +terminal output queue means discarding any characters that have been +written but not yet transmitted. + +@node Canonical or Not +@section Two Styles of Input: Canonical or Not + +POSIX systems support two basic modes of input: canonical and +noncanonical. + +@cindex canonical input processing +In @dfn{canonical input processing} mode, terminal input is processed in +lines terminated by newline (@code{'\n'}), EOF, or EOL characters. No +input can be read until an entire line has been typed by the user, and +the @code{read} function (@pxref{I/O Primitives}) returns at most a +single line of input, no matter how many bytes are requested. + +In canonical input mode, the operating system provides input editing +facilities: some characters are interpreted specially to perform editing +operations within the current line of text, such as ERASE and KILL. +@xref{Editing Characters}. + +The constants @code{_POSIX_MAX_CANON} and @code{MAX_CANON} parameterize +the maximum number of bytes which may appear in a single line of +canonical input. @xref{Limits for Files}. You are guaranteed a maximum +line length of at least @code{MAX_CANON} bytes, but the maximum might be +larger, and might even dynamically change size. + +@cindex noncanonical input processing +In @dfn{noncanonical input processing} mode, characters are not grouped +into lines, and ERASE and KILL processing is not performed. The +granularity with which bytes are read in noncanonical input mode is +controlled by the MIN and TIME settings. @xref{Noncanonical Input}. + +Most programs use canonical input mode, because this gives the user a +way to edit input line by line. The usual reason to use noncanonical +mode is when the program accepts single-character commands or provides +its own editing facilities. + +The choice of canonical or noncanonical input is controlled by the +@code{ICANON} flag in the @code{c_lflag} member of @code{struct termios}. +@xref{Local Modes}. + +@node Terminal Modes +@section Terminal Modes + +@pindex termios.h +This section describes the various terminal attributes that control how +input and output are done. The functions, data structures, and symbolic +constants are all declared in the header file @file{termios.h}. + +Don't confuse terminal attributes with file attributes. A device special +file which is associated with a terminal has file attributes as described +in @ref{File Attributes}. These are unrelated to the attributes of the +terminal device itself, which are discussed in this section. + +@menu +* Mode Data Types:: The data type @code{struct termios} and + related types. +* Mode Functions:: Functions to read and set the terminal + attributes. +* Setting Modes:: The right way to set terminal attributes + reliably. +* Input Modes:: Flags controlling low-level input handling. +* Output Modes:: Flags controlling low-level output handling. +* Control Modes:: Flags controlling serial port behavior. +* Local Modes:: Flags controlling high-level input handling. +* Line Speed:: How to read and set the terminal line speed. +* Special Characters:: Characters that have special effects, + and how to change them. +* Noncanonical Input:: Controlling how long to wait for input. +@end menu + +@node Mode Data Types +@subsection Terminal Mode Data Types +@cindex terminal mode data types + +The entire collection of attributes of a terminal is stored in a +structure of type @code{struct termios}. This structure is used +with the functions @code{tcgetattr} and @code{tcsetattr} to read +and set the attributes. + +@comment termios.h +@comment POSIX.1 +@deftp {Data Type} {struct termios} +A @code{struct termios} records all the I/O attributes of a terminal. The +structure includes at least the following members: + +@table @code +@item tcflag_t c_iflag +A bit mask specifying flags for input modes; see @ref{Input Modes}. + +@item tcflag_t c_oflag +A bit mask specifying flags for output modes; see @ref{Output Modes}. + +@item tcflag_t c_cflag +A bit mask specifying flags for control modes; see @ref{Control Modes}. + +@item tcflag_t c_lflag +A bit mask specifying flags for local modes; see @ref{Local Modes}. + +@item cc_t c_cc[NCCS] +An array specifying which characters are associated with various +control functions; see @ref{Special Characters}. +@end table + +The @code{struct termios} structure also contains members which +encode input and output transmission speeds, but the representation is +not specified. @xref{Line Speed}, for how to examine and store the +speed values. +@end deftp + +The following sections describe the details of the members of the +@code{struct termios} structure. + +@comment termios.h +@comment POSIX.1 +@deftp {Data Type} tcflag_t +This is an unsigned integer type used to represent the various +bit masks for terminal flags. +@end deftp + +@comment termios.h +@comment POSIX.1 +@deftp {Data Type} cc_t +This is an unsigned integer type used to represent characters associated +with various terminal control functions. +@end deftp + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int NCCS +The value of this macro is the number of elements in the @code{c_cc} +array. +@end deftypevr + +@node Mode Functions +@subsection Terminal Mode Functions +@cindex terminal mode functions + +@comment termios.h +@comment POSIX.1 +@deftypefun int tcgetattr (int @var{filedes}, struct termios *@var{termios-p}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Converting the kernel-returned termios data structure to the userland +@c format does not ensure atomic or consistent writing. +This function is used to examine the attributes of the terminal +device with file descriptor @var{filedes}. The attributes are returned +in the structure that @var{termios-p} points to. + +If successful, @code{tcgetattr} returns @math{0}. A return value of @math{-1} +indicates an error. The following @code{errno} error conditions are +defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item ENOTTY +The @var{filedes} is not associated with a terminal. +@end table +@end deftypefun + +@comment termios.h +@comment POSIX.1 +@deftypefun int tcsetattr (int @var{filedes}, int @var{when}, const struct termios *@var{termios-p}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Converting the incoming termios data structure to the kernel format +@c does not ensure atomic or consistent reading. +This function sets the attributes of the terminal device with file +descriptor @var{filedes}. The new attributes are taken from the +structure that @var{termios-p} points to. + +The @var{when} argument specifies how to deal with input and output +already queued. It can be one of the following values: + +@vtable @code +@comment termios.h +@comment POSIX.1 +@item TCSANOW +Make the change immediately. + +@comment termios.h +@comment POSIX.1 +@item TCSADRAIN +Make the change after waiting until all queued output has been written. +You should usually use this option when changing parameters that affect +output. + +@comment termios.h +@comment POSIX.1 +@item TCSAFLUSH +This is like @code{TCSADRAIN}, but also discards any queued input. + +@comment termios.h +@comment BSD +@item TCSASOFT +This is a flag bit that you can add to any of the above alternatives. +Its meaning is to inhibit alteration of the state of the terminal +hardware. It is a BSD extension; it is only supported on BSD systems +and @gnuhurdsystems{}. + +Using @code{TCSASOFT} is exactly the same as setting the @code{CIGNORE} +bit in the @code{c_cflag} member of the structure @var{termios-p} points +to. @xref{Control Modes}, for a description of @code{CIGNORE}. +@end vtable + +If this function is called from a background process on its controlling +terminal, normally all processes in the process group are sent a +@code{SIGTTOU} signal, in the same way as if the process were trying to +write to the terminal. The exception is if the calling process itself +is ignoring or blocking @code{SIGTTOU} signals, in which case the +operation is performed and no signal is sent. @xref{Job Control}. + +If successful, @code{tcsetattr} returns @math{0}. A return value of +@math{-1} indicates an error. The following @code{errno} error +conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item ENOTTY +The @var{filedes} is not associated with a terminal. + +@item EINVAL +Either the value of the @code{when} argument is not valid, or there is +something wrong with the data in the @var{termios-p} argument. +@end table +@end deftypefun + +Although @code{tcgetattr} and @code{tcsetattr} specify the terminal +device with a file descriptor, the attributes are those of the terminal +device itself and not of the file descriptor. This means that the +effects of changing terminal attributes are persistent; if another +process opens the terminal file later on, it will see the changed +attributes even though it doesn't have anything to do with the open file +descriptor you originally specified in changing the attributes. + +Similarly, if a single process has multiple or duplicated file +descriptors for the same terminal device, changing the terminal +attributes affects input and output to all of these file +descriptors. This means, for example, that you can't open one file +descriptor or stream to read from a terminal in the normal +line-buffered, echoed mode; and simultaneously have another file +descriptor for the same terminal that you use to read from it in +single-character, non-echoed mode. Instead, you have to explicitly +switch the terminal back and forth between the two modes. + +@node Setting Modes +@subsection Setting Terminal Modes Properly + +When you set terminal modes, you should call @code{tcgetattr} first to +get the current modes of the particular terminal device, modify only +those modes that you are really interested in, and store the result with +@code{tcsetattr}. + +It's a bad idea to simply initialize a @code{struct termios} structure +to a chosen set of attributes and pass it directly to @code{tcsetattr}. +Your program may be run years from now, on systems that support members +not documented in this manual. The way to avoid setting these members +to unreasonable values is to avoid changing them. + +What's more, different terminal devices may require different mode +settings in order to function properly. So you should avoid blindly +copying attributes from one terminal device to another. + +When a member contains a collection of independent flags, as the +@code{c_iflag}, @code{c_oflag} and @code{c_cflag} members do, even +setting the entire member is a bad idea, because particular operating +systems have their own flags. Instead, you should start with the +current value of the member and alter only the flags whose values matter +in your program, leaving any other flags unchanged. + +Here is an example of how to set one flag (@code{ISTRIP}) in the +@code{struct termios} structure while properly preserving all the other +data in the structure: + +@smallexample +@group +int +set_istrip (int desc, int value) +@{ + struct termios settings; + int result; +@end group + +@group + result = tcgetattr (desc, &settings); + if (result < 0) + @{ + perror ("error in tcgetattr"); + return 0; + @} +@end group +@group + settings.c_iflag &= ~ISTRIP; + if (value) + settings.c_iflag |= ISTRIP; +@end group +@group + result = tcsetattr (desc, TCSANOW, &settings); + if (result < 0) + @{ + perror ("error in tcsetattr"); + return 0; + @} + return 1; +@} +@end group +@end smallexample + +@node Input Modes +@subsection Input Modes + +This section describes the terminal attribute flags that control +fairly low-level aspects of input processing: handling of parity errors, +break signals, flow control, and @key{RET} and @key{LFD} characters. + +All of these flags are bits in the @code{c_iflag} member of the +@code{struct termios} structure. The member is an integer, and you +change flags using the operators @code{&}, @code{|} and @code{^}. Don't +try to specify the entire value for @code{c_iflag}---instead, change +only specific flags and leave the rest untouched (@pxref{Setting +Modes}). + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t INPCK +@cindex parity checking +If this bit is set, input parity checking is enabled. If it is not set, +no checking at all is done for parity errors on input; the +characters are simply passed through to the application. + +Parity checking on input processing is independent of whether parity +detection and generation on the underlying terminal hardware is enabled; +see @ref{Control Modes}. For example, you could clear the @code{INPCK} +input mode flag and set the @code{PARENB} control mode flag to ignore +parity errors on input, but still generate parity on output. + +If this bit is set, what happens when a parity error is detected depends +on whether the @code{IGNPAR} or @code{PARMRK} bits are set. If neither +of these bits are set, a byte with a parity error is passed to the +application as a @code{'\0'} character. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t IGNPAR +If this bit is set, any byte with a framing or parity error is ignored. +This is only useful if @code{INPCK} is also set. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t PARMRK +If this bit is set, input bytes with parity or framing errors are marked +when passed to the program. This bit is meaningful only when +@code{INPCK} is set and @code{IGNPAR} is not set. + +The way erroneous bytes are marked is with two preceding bytes, +@code{377} and @code{0}. Thus, the program actually reads three bytes +for one erroneous byte received from the terminal. + +If a valid byte has the value @code{0377}, and @code{ISTRIP} (see below) +is not set, the program might confuse it with the prefix that marks a +parity error. So a valid byte @code{0377} is passed to the program as +two bytes, @code{0377} @code{0377}, in this case. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ISTRIP +If this bit is set, valid input bytes are stripped to seven bits; +otherwise, all eight bits are available for programs to read. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t IGNBRK +If this bit is set, break conditions are ignored. + +@cindex break condition, detecting +A @dfn{break condition} is defined in the context of asynchronous +serial data transmission as a series of zero-value bits longer than a +single byte. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t BRKINT +If this bit is set and @code{IGNBRK} is not set, a break condition +clears the terminal input and output queues and raises a @code{SIGINT} +signal for the foreground process group associated with the terminal. + +If neither @code{BRKINT} nor @code{IGNBRK} are set, a break condition is +passed to the application as a single @code{'\0'} character if +@code{PARMRK} is not set, or otherwise as a three-character sequence +@code{'\377'}, @code{'\0'}, @code{'\0'}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t IGNCR +If this bit is set, carriage return characters (@code{'\r'}) are +discarded on input. Discarding carriage return may be useful on +terminals that send both carriage return and linefeed when you type the +@key{RET} key. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ICRNL +If this bit is set and @code{IGNCR} is not set, carriage return characters +(@code{'\r'}) received as input are passed to the application as newline +characters (@code{'\n'}). +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t INLCR +If this bit is set, newline characters (@code{'\n'}) received as input +are passed to the application as carriage return characters (@code{'\r'}). +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t IXOFF +If this bit is set, start/stop control on input is enabled. In other +words, the computer sends STOP and START characters as necessary to +prevent input from coming in faster than programs are reading it. The +idea is that the actual terminal hardware that is generating the input +data responds to a STOP character by suspending transmission, and to a +START character by resuming transmission. @xref{Start/Stop Characters}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t IXON +If this bit is set, start/stop control on output is enabled. In other +words, if the computer receives a STOP character, it suspends output +until a START character is received. In this case, the STOP and START +characters are never passed to the application program. If this bit is +not set, then START and STOP can be read as ordinary characters. +@xref{Start/Stop Characters}. +@c !!! mention this interferes with using C-s and C-q for programs like emacs +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t IXANY +If this bit is set, any input character restarts output when output has +been suspended with the STOP character. Otherwise, only the START +character restarts output. + +This is a BSD extension; it exists only on BSD systems and +@gnulinuxhurdsystems{}. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t IMAXBEL +If this bit is set, then filling up the terminal input buffer sends a +BEL character (code @code{007}) to the terminal to ring the bell. + +This is a BSD extension. +@end deftypevr + +@node Output Modes +@subsection Output Modes + +This section describes the terminal flags and fields that control how +output characters are translated and padded for display. All of these +are contained in the @code{c_oflag} member of the @w{@code{struct termios}} +structure. + +The @code{c_oflag} member itself is an integer, and you change the flags +and fields using the operators @code{&}, @code{|}, and @code{^}. Don't +try to specify the entire value for @code{c_oflag}---instead, change +only specific flags and leave the rest untouched (@pxref{Setting +Modes}). + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t OPOST +If this bit is set, output data is processed in some unspecified way so +that it is displayed appropriately on the terminal device. This +typically includes mapping newline characters (@code{'\n'}) onto +carriage return and linefeed pairs. + +If this bit isn't set, the characters are transmitted as-is. +@end deftypevr + +The following three bits are effective only if @code{OPOST} is set. + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ONLCR +If this bit is set, convert the newline character on output into a pair +of characters, carriage return followed by linefeed. +@end deftypevr + +@comment termios.h (optional) +@comment BSD +@deftypevr Macro tcflag_t OXTABS +If this bit is set, convert tab characters on output into the appropriate +number of spaces to emulate a tab stop every eight columns. This bit +exists only on BSD systems and @gnuhurdsystems{}; on +@gnulinuxsystems{} it is available as @code{XTABS}. +@end deftypevr + +@comment termios.h (optional) +@comment BSD +@deftypevr Macro tcflag_t ONOEOT +If this bit is set, discard @kbd{C-d} characters (code @code{004}) on +output. These characters cause many dial-up terminals to disconnect. +This bit exists only on BSD systems and @gnuhurdsystems{}. +@end deftypevr + +@node Control Modes +@subsection Control Modes + +This section describes the terminal flags and fields that control +parameters usually associated with asynchronous serial data +transmission. These flags may not make sense for other kinds of +terminal ports (such as a network connection pseudo-terminal). All of +these are contained in the @code{c_cflag} member of the @code{struct +termios} structure. + +The @code{c_cflag} member itself is an integer, and you change the flags +and fields using the operators @code{&}, @code{|}, and @code{^}. Don't +try to specify the entire value for @code{c_cflag}---instead, change +only specific flags and leave the rest untouched (@pxref{Setting +Modes}). + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t CLOCAL +If this bit is set, it indicates that the terminal is connected +``locally'' and that the modem status lines (such as carrier detect) +should be ignored. +@cindex modem status lines +@cindex carrier detect + +On many systems if this bit is not set and you call @code{open} without +the @code{O_NONBLOCK} flag set, @code{open} blocks until a modem +connection is established. + +If this bit is not set and a modem disconnect is detected, a +@code{SIGHUP} signal is sent to the controlling process group for the +terminal (if it has one). Normally, this causes the process to exit; +see @ref{Signal Handling}. Reading from the terminal after a disconnect +causes an end-of-file condition, and writing causes an @code{EIO} error +to be returned. The terminal device must be closed and reopened to +clear the condition. +@cindex modem disconnect +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t HUPCL +If this bit is set, a modem disconnect is generated when all processes +that have the terminal device open have either closed the file or exited. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t CREAD +If this bit is set, input can be read from the terminal. Otherwise, +input is discarded when it arrives. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t CSTOPB +If this bit is set, two stop bits are used. Otherwise, only one stop bit +is used. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t PARENB +If this bit is set, generation and detection of a parity bit are enabled. +@xref{Input Modes}, for information on how input parity errors are handled. + +If this bit is not set, no parity bit is added to output characters, and +input characters are not checked for correct parity. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t PARODD +This bit is only useful if @code{PARENB} is set. If @code{PARODD} is set, +odd parity is used, otherwise even parity is used. +@end deftypevr + +The control mode flags also includes a field for the number of bits per +character. You can use the @code{CSIZE} macro as a mask to extract the +value, like this: @code{settings.c_cflag & CSIZE}. + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t CSIZE +This is a mask for the number of bits per character. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t CS5 +This specifies five bits per byte. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t CS6 +This specifies six bits per byte. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t CS7 +This specifies seven bits per byte. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t CS8 +This specifies eight bits per byte. +@end deftypevr + +The following four bits are BSD extensions; these exist only on BSD +systems and @gnuhurdsystems{}. + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t CCTS_OFLOW +If this bit is set, enable flow control of output based on the CTS wire +(RS232 protocol). +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t CRTS_IFLOW +If this bit is set, enable flow control of input based on the RTS wire +(RS232 protocol). +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t MDMBUF +If this bit is set, enable carrier-based flow control of output. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t CIGNORE +If this bit is set, it says to ignore the control modes and line speed +values entirely. This is only meaningful in a call to @code{tcsetattr}. + +The @code{c_cflag} member and the line speed values returned by +@code{cfgetispeed} and @code{cfgetospeed} will be unaffected by the +call. @code{CIGNORE} is useful if you want to set all the software +modes in the other members, but leave the hardware details in +@code{c_cflag} unchanged. (This is how the @code{TCSASOFT} flag to +@code{tcsettattr} works.) + +This bit is never set in the structure filled in by @code{tcgetattr}. +@end deftypevr + +@node Local Modes +@subsection Local Modes + +This section describes the flags for the @code{c_lflag} member of the +@code{struct termios} structure. These flags generally control +higher-level aspects of input processing than the input modes flags +described in @ref{Input Modes}, such as echoing, signals, and the choice +of canonical or noncanonical input. + +The @code{c_lflag} member itself is an integer, and you change the flags +and fields using the operators @code{&}, @code{|}, and @code{^}. Don't +try to specify the entire value for @code{c_lflag}---instead, change +only specific flags and leave the rest untouched (@pxref{Setting +Modes}). + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ICANON +This bit, if set, enables canonical input processing mode. Otherwise, +input is processed in noncanonical mode. @xref{Canonical or Not}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ECHO +If this bit is set, echoing of input characters back to the terminal +is enabled. +@cindex echo of terminal input +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ECHOE +If this bit is set, echoing indicates erasure of input with the ERASE +character by erasing the last character in the current line from the +screen. Otherwise, the character erased is re-echoed to show what has +happened (suitable for a printing terminal). + +This bit only controls the display behavior; the @code{ICANON} bit by +itself controls actual recognition of the ERASE character and erasure of +input, without which @code{ECHOE} is simply irrelevant. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t ECHOPRT +This bit, like @code{ECHOE}, enables display of the ERASE character in +a way that is geared to a hardcopy terminal. When you type the ERASE +character, a @samp{\} character is printed followed by the first +character erased. Typing the ERASE character again just prints the next +character erased. Then, the next time you type a normal character, a +@samp{/} character is printed before the character echoes. + +This is a BSD extension, and exists only in BSD systems and +@gnulinuxhurdsystems{}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ECHOK +This bit enables special display of the KILL character by moving to a +new line after echoing the KILL character normally. The behavior of +@code{ECHOKE} (below) is nicer to look at. + +If this bit is not set, the KILL character echoes just as it would if it +were not the KILL character. Then it is up to the user to remember that +the KILL character has erased the preceding input; there is no +indication of this on the screen. + +This bit only controls the display behavior; the @code{ICANON} bit by +itself controls actual recognition of the KILL character and erasure of +input, without which @code{ECHOK} is simply irrelevant. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t ECHOKE +This bit is similar to @code{ECHOK}. It enables special display of the +KILL character by erasing on the screen the entire line that has been +killed. This is a BSD extension, and exists only in BSD systems and +@gnulinuxhurdsystems{}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ECHONL +If this bit is set and the @code{ICANON} bit is also set, then the +newline (@code{'\n'}) character is echoed even if the @code{ECHO} bit +is not set. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t ECHOCTL +If this bit is set and the @code{ECHO} bit is also set, echo control +characters with @samp{^} followed by the corresponding text character. +Thus, control-A echoes as @samp{^A}. This is usually the preferred mode +for interactive input, because echoing a control character back to the +terminal could have some undesired effect on the terminal. + +This is a BSD extension, and exists only in BSD systems and +@gnulinuxhurdsystems{}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t ISIG +This bit controls whether the INTR, QUIT, and SUSP characters are +recognized. The functions associated with these characters are performed +if and only if this bit is set. Being in canonical or noncanonical +input mode has no effect on the interpretation of these characters. + +You should use caution when disabling recognition of these characters. +Programs that cannot be interrupted interactively are very +user-unfriendly. If you clear this bit, your program should provide +some alternate interface that allows the user to interactively send the +signals associated with these characters, or to escape from the program. +@cindex interactive signals, from terminal + +@xref{Signal Characters}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t IEXTEN +POSIX.1 gives @code{IEXTEN} implementation-defined meaning, +so you cannot rely on this interpretation on all systems. + +On BSD systems and @gnulinuxhurdsystems{}, it enables the LNEXT and +DISCARD characters. +@xref{Other Special}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t NOFLSH +Normally, the INTR, QUIT, and SUSP characters cause input and output +queues for the terminal to be cleared. If this bit is set, the queues +are not cleared. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro tcflag_t TOSTOP +If this bit is set and the system supports job control, then +@code{SIGTTOU} signals are generated by background processes that +attempt to write to the terminal. @xref{Access to the Terminal}. +@end deftypevr + +The following bits are BSD extensions; they exist only on BSD systems +and @gnuhurdsystems{}. + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t ALTWERASE +This bit determines how far the WERASE character should erase. The +WERASE character erases back to the beginning of a word; the question +is, where do words begin? + +If this bit is clear, then the beginning of a word is a nonwhitespace +character following a whitespace character. If the bit is set, then the +beginning of a word is an alphanumeric character or underscore following +a character which is none of those. + +@xref{Editing Characters}, for more information about the WERASE character. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t FLUSHO +This is the bit that toggles when the user types the DISCARD character. +While this bit is set, all output is discarded. @xref{Other Special}. +@end deftypevr + +@comment termios.h (optional) +@comment BSD +@deftypevr Macro tcflag_t NOKERNINFO +Setting this bit disables handling of the STATUS character. +@xref{Other Special}. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro tcflag_t PENDIN +If this bit is set, it indicates that there is a line of input that +needs to be reprinted. Typing the REPRINT character sets this bit; the +bit remains set until reprinting is finished. @xref{Editing Characters}. +@end deftypevr + +@c EXTPROC is too obscure to document now. --roland + +@node Line Speed +@subsection Line Speed +@cindex line speed +@cindex baud rate +@cindex terminal line speed +@cindex terminal line speed + +The terminal line speed tells the computer how fast to read and write +data on the terminal. + +If the terminal is connected to a real serial line, the terminal speed +you specify actually controls the line---if it doesn't match the +terminal's own idea of the speed, communication does not work. Real +serial ports accept only certain standard speeds. Also, particular +hardware may not support even all the standard speeds. Specifying a +speed of zero hangs up a dialup connection and turns off modem control +signals. + +If the terminal is not a real serial line (for example, if it is a +network connection), then the line speed won't really affect data +transmission speed, but some programs will use it to determine the +amount of padding needed. It's best to specify a line speed value that +matches the actual speed of the actual terminal, but you can safely +experiment with different values to vary the amount of padding. + +There are actually two line speeds for each terminal, one for input and +one for output. You can set them independently, but most often +terminals use the same speed for both directions. + +The speed values are stored in the @code{struct termios} structure, but +don't try to access them in the @code{struct termios} structure +directly. Instead, you should use the following functions to read and +store them: + +@comment termios.h +@comment POSIX.1 +@deftypefun speed_t cfgetospeed (const struct termios *@var{termios-p}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct access to a single termios field, except on Linux, where +@c multiple accesses may take place. No worries either way, callers +@c must ensure mutual exclusion on such non-opaque types. +This function returns the output line speed stored in the structure +@code{*@var{termios-p}}. +@end deftypefun + +@comment termios.h +@comment POSIX.1 +@deftypefun speed_t cfgetispeed (const struct termios *@var{termios-p}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function returns the input line speed stored in the structure +@code{*@var{termios-p}}. +@end deftypefun + +@comment termios.h +@comment POSIX.1 +@deftypefun int cfsetospeed (struct termios *@var{termios-p}, speed_t @var{speed}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function stores @var{speed} in @code{*@var{termios-p}} as the output +speed. The normal return value is @math{0}; a value of @math{-1} +indicates an error. If @var{speed} is not a speed, @code{cfsetospeed} +returns @math{-1}. +@end deftypefun + +@comment termios.h +@comment POSIX.1 +@deftypefun int cfsetispeed (struct termios *@var{termios-p}, speed_t @var{speed}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +This function stores @var{speed} in @code{*@var{termios-p}} as the input +speed. The normal return value is @math{0}; a value of @math{-1} +indicates an error. If @var{speed} is not a speed, @code{cfsetospeed} +returns @math{-1}. +@end deftypefun + +@comment termios.h +@comment BSD +@deftypefun int cfsetspeed (struct termios *@var{termios-p}, speed_t @var{speed}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c There's no guarantee that the two calls are atomic, but since this is +@c not an opaque type, callers ought to ensure mutual exclusion to the +@c termios object. + +@c cfsetspeed ok +@c cfsetispeed ok +@c cfsetospeed ok +This function stores @var{speed} in @code{*@var{termios-p}} as both the +input and output speeds. The normal return value is @math{0}; a value +of @math{-1} indicates an error. If @var{speed} is not a speed, +@code{cfsetspeed} returns @math{-1}. This function is an extension in +4.4 BSD. +@end deftypefun + +@comment termios.h +@comment POSIX.1 +@deftp {Data Type} speed_t +The @code{speed_t} type is an unsigned integer data type used to +represent line speeds. +@end deftp + +The functions @code{cfsetospeed} and @code{cfsetispeed} report errors +only for speed values that the system simply cannot handle. If you +specify a speed value that is basically acceptable, then those functions +will succeed. But they do not check that a particular hardware device +can actually support the specified speeds---in fact, they don't know +which device you plan to set the speed for. If you use @code{tcsetattr} +to set the speed of a particular device to a value that it cannot +handle, @code{tcsetattr} returns @math{-1}. + +@strong{Portability note:} In @theglibc{}, the functions above +accept speeds measured in bits per second as input, and return speed +values measured in bits per second. Other libraries require speeds to +be indicated by special codes. For POSIX.1 portability, you must use +one of the following symbols to represent the speed; their precise +numeric values are system-dependent, but each name has a fixed meaning: +@code{B110} stands for 110 bps, @code{B300} for 300 bps, and so on. +There is no portable way to represent any speed but these, but these are +the only speeds that typical serial lines can support. + +@comment termios.h +@comment POSIX.1 +@vindex B0 +@comment termios.h +@comment POSIX.1 +@vindex B50 +@comment termios.h +@comment POSIX.1 +@vindex B75 +@comment termios.h +@comment POSIX.1 +@vindex B110 +@comment termios.h +@comment POSIX.1 +@vindex B134 +@comment termios.h +@comment POSIX.1 +@vindex B150 +@comment termios.h +@comment POSIX.1 +@vindex B200 +@comment termios.h +@comment POSIX.1 +@vindex B300 +@comment termios.h +@comment POSIX.1 +@vindex B600 +@comment termios.h +@comment POSIX.1 +@vindex B1200 +@comment termios.h +@comment POSIX.1 +@vindex B1800 +@comment termios.h +@comment POSIX.1 +@vindex B2400 +@comment termios.h +@comment POSIX.1 +@vindex B4800 +@comment termios.h +@comment POSIX.1 +@vindex B9600 +@comment termios.h +@comment POSIX.1 +@vindex B19200 +@comment termios.h +@comment POSIX.1 +@vindex B38400 +@comment termios.h +@comment GNU +@vindex B57600 +@comment termios.h +@comment GNU +@vindex B115200 +@comment termios.h +@comment GNU +@vindex B230400 +@comment termios.h +@comment GNU +@vindex B460800 +@smallexample +B0 B50 B75 B110 B134 B150 B200 +B300 B600 B1200 B1800 B2400 B4800 +B9600 B19200 B38400 B57600 B115200 +B230400 B460800 +@end smallexample + +@vindex EXTA +@vindex EXTB +BSD defines two additional speed symbols as aliases: @code{EXTA} is an +alias for @code{B19200} and @code{EXTB} is an alias for @code{B38400}. +These aliases are obsolete. + +@node Special Characters +@subsection Special Characters + +In canonical input, the terminal driver recognizes a number of special +characters which perform various control functions. These include the +ERASE character (usually @key{DEL}) for editing input, and other editing +characters. The INTR character (normally @kbd{C-c}) for sending a +@code{SIGINT} signal, and other signal-raising characters, may be +available in either canonical or noncanonical input mode. All these +characters are described in this section. + +The particular characters used are specified in the @code{c_cc} member +of the @code{struct termios} structure. This member is an array; each +element specifies the character for a particular role. Each element has +a symbolic constant that stands for the index of that element---for +example, @code{VINTR} is the index of the element that specifies the INTR +character, so storing @code{'='} in @code{@var{termios}.c_cc[VINTR]} +specifies @samp{=} as the INTR character. + +@vindex _POSIX_VDISABLE +On some systems, you can disable a particular special character function +by specifying the value @code{_POSIX_VDISABLE} for that role. This +value is unequal to any possible character code. @xref{Options for +Files}, for more information about how to tell whether the operating +system you are using supports @code{_POSIX_VDISABLE}. + +@menu +* Editing Characters:: Special characters that terminate lines and + delete text, and other editing functions. +* Signal Characters:: Special characters that send or raise signals + to or for certain classes of processes. +* Start/Stop Characters:: Special characters that suspend or resume + suspended output. +* Other Special:: Other special characters for BSD systems: + they can discard output, and print status. +@end menu + +@node Editing Characters +@subsubsection Characters for Input Editing + +These special characters are active only in canonical input mode. +@xref{Canonical or Not}. + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VEOF +@cindex EOF character +This is the subscript for the EOF character in the special control +character array. @code{@var{termios}.c_cc[VEOF]} holds the character +itself. + +The EOF character is recognized only in canonical input mode. It acts +as a line terminator in the same way as a newline character, but if the +EOF character is typed at the beginning of a line it causes @code{read} +to return a byte count of zero, indicating end-of-file. The EOF +character itself is discarded. + +Usually, the EOF character is @kbd{C-d}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VEOL +@cindex EOL character +This is the subscript for the EOL character in the special control +character array. @code{@var{termios}.c_cc[VEOL]} holds the character +itself. + +The EOL character is recognized only in canonical input mode. It acts +as a line terminator, just like a newline character. The EOL character +is not discarded; it is read as the last character in the input line. + +@c !!! example: this is set to ESC by 4.3 csh with "set filec" so it can +@c complete partial lines without using cbreak or raw mode. + +You don't need to use the EOL character to make @key{RET} end a line. +Just set the ICRNL flag. In fact, this is the default state of +affairs. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro int VEOL2 +@cindex EOL2 character +This is the subscript for the EOL2 character in the special control +character array. @code{@var{termios}.c_cc[VEOL2]} holds the character +itself. + +The EOL2 character works just like the EOL character (see above), but it +can be a different character. Thus, you can specify two characters to +terminate an input line, by setting EOL to one of them and EOL2 to the +other. + +The EOL2 character is a BSD extension; it exists only on BSD systems +and @gnulinuxhurdsystems{}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VERASE +@cindex ERASE character +This is the subscript for the ERASE character in the special control +character array. @code{@var{termios}.c_cc[VERASE]} holds the +character itself. + +The ERASE character is recognized only in canonical input mode. When +the user types the erase character, the previous character typed is +discarded. (If the terminal generates multibyte character sequences, +this may cause more than one byte of input to be discarded.) This +cannot be used to erase past the beginning of the current line of text. +The ERASE character itself is discarded. +@c !!! mention ECHOE here + +Usually, the ERASE character is @key{DEL}. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro int VWERASE +@cindex WERASE character +This is the subscript for the WERASE character in the special control +character array. @code{@var{termios}.c_cc[VWERASE]} holds the character +itself. + +The WERASE character is recognized only in canonical mode. It erases an +entire word of prior input, and any whitespace after it; whitespace +characters before the word are not erased. + +The definition of a ``word'' depends on the setting of the +@code{ALTWERASE} mode; @pxref{Local Modes}. + +If the @code{ALTWERASE} mode is not set, a word is defined as a sequence +of any characters except space or tab. + +If the @code{ALTWERASE} mode is set, a word is defined as a sequence of +characters containing only letters, numbers, and underscores, optionally +followed by one character that is not a letter, number, or underscore. + +The WERASE character is usually @kbd{C-w}. + +This is a BSD extension. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VKILL +@cindex KILL character +This is the subscript for the KILL character in the special control +character array. @code{@var{termios}.c_cc[VKILL]} holds the character +itself. + +The KILL character is recognized only in canonical input mode. When the +user types the kill character, the entire contents of the current line +of input are discarded. The kill character itself is discarded too. + +The KILL character is usually @kbd{C-u}. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro int VREPRINT +@cindex REPRINT character +This is the subscript for the REPRINT character in the special control +character array. @code{@var{termios}.c_cc[VREPRINT]} holds the character +itself. + +The REPRINT character is recognized only in canonical mode. It reprints +the current input line. If some asynchronous output has come while you +are typing, this lets you see the line you are typing clearly again. + +The REPRINT character is usually @kbd{C-r}. + +This is a BSD extension. +@end deftypevr + +@node Signal Characters +@subsubsection Characters that Cause Signals + +These special characters may be active in either canonical or noncanonical +input mode, but only when the @code{ISIG} flag is set (@pxref{Local +Modes}). + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VINTR +@cindex INTR character +@cindex interrupt character +This is the subscript for the INTR character in the special control +character array. @code{@var{termios}.c_cc[VINTR]} holds the character +itself. + +The INTR (interrupt) character raises a @code{SIGINT} signal for all +processes in the foreground job associated with the terminal. The INTR +character itself is then discarded. @xref{Signal Handling}, for more +information about signals. + +Typically, the INTR character is @kbd{C-c}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VQUIT +@cindex QUIT character +This is the subscript for the QUIT character in the special control +character array. @code{@var{termios}.c_cc[VQUIT]} holds the character +itself. + +The QUIT character raises a @code{SIGQUIT} signal for all processes in +the foreground job associated with the terminal. The QUIT character +itself is then discarded. @xref{Signal Handling}, for more information +about signals. + +Typically, the QUIT character is @kbd{C-\}. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VSUSP +@cindex SUSP character +@cindex suspend character +This is the subscript for the SUSP character in the special control +character array. @code{@var{termios}.c_cc[VSUSP]} holds the character +itself. + +The SUSP (suspend) character is recognized only if the implementation +supports job control (@pxref{Job Control}). It causes a @code{SIGTSTP} +signal to be sent to all processes in the foreground job associated with +the terminal. The SUSP character itself is then discarded. +@xref{Signal Handling}, for more information about signals. + +Typically, the SUSP character is @kbd{C-z}. +@end deftypevr + +Few applications disable the normal interpretation of the SUSP +character. If your program does this, it should provide some other +mechanism for the user to stop the job. When the user invokes this +mechanism, the program should send a @code{SIGTSTP} signal to the +process group of the process, not just to the process itself. +@xref{Signaling Another Process}. + +@comment termios.h +@comment BSD +@deftypevr Macro int VDSUSP +@cindex DSUSP character +@cindex delayed suspend character +This is the subscript for the DSUSP character in the special control +character array. @code{@var{termios}.c_cc[VDSUSP]} holds the character +itself. + +The DSUSP (suspend) character is recognized only if the implementation +supports job control (@pxref{Job Control}). It sends a @code{SIGTSTP} +signal, like the SUSP character, but not right away---only when the +program tries to read it as input. Not all systems with job control +support DSUSP; only BSD-compatible systems do (including @gnuhurdsystems{}). + +@xref{Signal Handling}, for more information about signals. + +Typically, the DSUSP character is @kbd{C-y}. +@end deftypevr + +@node Start/Stop Characters +@subsubsection Special Characters for Flow Control + +These special characters may be active in either canonical or noncanonical +input mode, but their use is controlled by the flags @code{IXON} and +@code{IXOFF} (@pxref{Input Modes}). + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VSTART +@cindex START character +This is the subscript for the START character in the special control +character array. @code{@var{termios}.c_cc[VSTART]} holds the +character itself. + +The START character is used to support the @code{IXON} and @code{IXOFF} +input modes. If @code{IXON} is set, receiving a START character resumes +suspended output; the START character itself is discarded. If +@code{IXANY} is set, receiving any character at all resumes suspended +output; the resuming character is not discarded unless it is the START +character. If @code{IXOFF} is set, the system may also transmit START +characters to the terminal. + +The usual value for the START character is @kbd{C-q}. You may not be +able to change this value---the hardware may insist on using @kbd{C-q} +regardless of what you specify. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VSTOP +@cindex STOP character +This is the subscript for the STOP character in the special control +character array. @code{@var{termios}.c_cc[VSTOP]} holds the character +itself. + +The STOP character is used to support the @code{IXON} and @code{IXOFF} +input modes. If @code{IXON} is set, receiving a STOP character causes +output to be suspended; the STOP character itself is discarded. If +@code{IXOFF} is set, the system may also transmit STOP characters to the +terminal, to prevent the input queue from overflowing. + +The usual value for the STOP character is @kbd{C-s}. You may not be +able to change this value---the hardware may insist on using @kbd{C-s} +regardless of what you specify. +@end deftypevr + +@node Other Special +@subsubsection Other Special Characters + +@comment termios.h +@comment BSD +@deftypevr Macro int VLNEXT +@cindex LNEXT character +This is the subscript for the LNEXT character in the special control +character array. @code{@var{termios}.c_cc[VLNEXT]} holds the character +itself. + +The LNEXT character is recognized only when @code{IEXTEN} is set, but in +both canonical and noncanonical mode. It disables any special +significance of the next character the user types. Even if the +character would normally perform some editing function or generate a +signal, it is read as a plain character. This is the analogue of the +@kbd{C-q} command in Emacs. ``LNEXT'' stands for ``literal next.'' + +The LNEXT character is usually @kbd{C-v}. + +This character is available on BSD systems and @gnulinuxhurdsystems{}. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro int VDISCARD +@cindex DISCARD character +This is the subscript for the DISCARD character in the special control +character array. @code{@var{termios}.c_cc[VDISCARD]} holds the character +itself. + +The DISCARD character is recognized only when @code{IEXTEN} is set, but +in both canonical and noncanonical mode. Its effect is to toggle the +discard-output flag. When this flag is set, all program output is +discarded. Setting the flag also discards all output currently in the +output buffer. Typing any other character resets the flag. + +This character is available on BSD systems and @gnulinuxhurdsystems{}. +@end deftypevr + +@comment termios.h +@comment BSD +@deftypevr Macro int VSTATUS +@cindex STATUS character +This is the subscript for the STATUS character in the special control +character array. @code{@var{termios}.c_cc[VSTATUS]} holds the character +itself. + +The STATUS character's effect is to print out a status message about how +the current process is running. + +The STATUS character is recognized only in canonical mode, and only if +@code{NOKERNINFO} is not set. + +This character is available only on BSD systems and @gnuhurdsystems{}. +@end deftypevr + +@node Noncanonical Input +@subsection Noncanonical Input + +In noncanonical input mode, the special editing characters such as +ERASE and KILL are ignored. The system facilities for the user to edit +input are disabled in noncanonical mode, so that all input characters +(unless they are special for signal or flow-control purposes) are passed +to the application program exactly as typed. It is up to the +application program to give the user ways to edit the input, if +appropriate. + +Noncanonical mode offers special parameters called MIN and TIME for +controlling whether and how long to wait for input to be available. You +can even use them to avoid ever waiting---to return immediately with +whatever input is available, or with no input. + +The MIN and TIME are stored in elements of the @code{c_cc} array, which +is a member of the @w{@code{struct termios}} structure. Each element of +this array has a particular role, and each element has a symbolic +constant that stands for the index of that element. @code{VMIN} and +@code{VTIME} are the names for the indices in the array of the MIN and +TIME slots. + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VMIN +@cindex MIN termios slot +This is the subscript for the MIN slot in the @code{c_cc} array. Thus, +@code{@var{termios}.c_cc[VMIN]} is the value itself. + +The MIN slot is only meaningful in noncanonical input mode; it +specifies the minimum number of bytes that must be available in the +input queue in order for @code{read} to return. +@end deftypevr + +@comment termios.h +@comment POSIX.1 +@deftypevr Macro int VTIME +@cindex TIME termios slot +This is the subscript for the TIME slot in the @code{c_cc} array. Thus, +@code{@var{termios}.c_cc[VTIME]} is the value itself. + +The TIME slot is only meaningful in noncanonical input mode; it +specifies how long to wait for input before returning, in units of 0.1 +seconds. +@end deftypevr + +The MIN and TIME values interact to determine the criterion for when +@code{read} should return; their precise meanings depend on which of +them are nonzero. There are four possible cases: + +@itemize @bullet +@item +Both TIME and MIN are nonzero. + +In this case, TIME specifies how long to wait after each input character +to see if more input arrives. After the first character received, +@code{read} keeps waiting until either MIN bytes have arrived in all, or +TIME elapses with no further input. + +@code{read} always blocks until the first character arrives, even if +TIME elapses first. @code{read} can return more than MIN characters if +more than MIN happen to be in the queue. + +@item +Both MIN and TIME are zero. + +In this case, @code{read} always returns immediately with as many +characters as are available in the queue, up to the number requested. +If no input is immediately available, @code{read} returns a value of +zero. + +@item +MIN is zero but TIME has a nonzero value. + +In this case, @code{read} waits for time TIME for input to become +available; the availability of a single byte is enough to satisfy the +read request and cause @code{read} to return. When it returns, it +returns as many characters as are available, up to the number requested. +If no input is available before the timer expires, @code{read} returns a +value of zero. + +@item +TIME is zero but MIN has a nonzero value. + +In this case, @code{read} waits until at least MIN bytes are available +in the queue. At that time, @code{read} returns as many characters as +are available, up to the number requested. @code{read} can return more +than MIN characters if more than MIN happen to be in the queue. +@end itemize + +What happens if MIN is 50 and you ask to read just 10 bytes? +Normally, @code{read} waits until there are 50 bytes in the buffer (or, +more generally, the wait condition described above is satisfied), and +then reads 10 of them, leaving the other 40 buffered in the operating +system for a subsequent call to @code{read}. + +@strong{Portability note:} On some systems, the MIN and TIME slots are +actually the same as the EOF and EOL slots. This causes no serious +problem because the MIN and TIME slots are used only in noncanonical +input and the EOF and EOL slots are used only in canonical input, but it +isn't very clean. @Theglibc{} allocates separate slots for these +uses. + +@comment termios.h +@comment BSD +@deftypefun void cfmakeraw (struct termios *@var{termios-p}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c There's no guarantee the changes are atomic, but since this is not an +@c opaque type, callers ought to ensure mutual exclusion to the termios +@c object. +This function provides an easy way to set up @code{*@var{termios-p}} for +what has traditionally been called ``raw mode'' in BSD. This uses +noncanonical input, and turns off most processing to give an unmodified +channel to the terminal. + +It does exactly this: +@smallexample + @var{termios-p}->c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP + |INLCR|IGNCR|ICRNL|IXON); + @var{termios-p}->c_oflag &= ~OPOST; + @var{termios-p}->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN); + @var{termios-p}->c_cflag &= ~(CSIZE|PARENB); + @var{termios-p}->c_cflag |= CS8; +@end smallexample +@end deftypefun + + +@node BSD Terminal Modes +@section BSD Terminal Modes +@cindex terminal modes, BSD + +The usual way to get and set terminal modes is with the functions described +in @ref{Terminal Modes}. However, on some systems you can use the +BSD-derived functions in this section to do some of the same things. On +many systems, these functions do not exist. Even with @theglibc{}, +the functions simply fail with @code{errno} = @code{ENOSYS} with many +kernels, including Linux. + +The symbols used in this section are declared in @file{sgtty.h}. + +@comment termios.h +@comment BSD +@deftp {Data Type} {struct sgttyb} +This structure is an input or output parameter list for @code{gtty} and +@code{stty}. + +@table @code +@item char sg_ispeed +Line speed for input +@item char sg_ospeed +Line speed for output +@item char sg_erase +Erase character +@item char sg_kill +Kill character +@item int sg_flags +Various flags +@end table +@end deftp + +@comment sgtty.h +@comment BSD +@deftypefun int gtty (int @var{filedes}, struct sgttyb *@var{attributes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct ioctl, BSD only. +This function gets the attributes of a terminal. + +@code{gtty} sets *@var{attributes} to describe the terminal attributes +of the terminal which is open with file descriptor @var{filedes}. +@end deftypefun + +@comment sgtty.h +@comment BSD +@deftypefun int stty (int @var{filedes}, const struct sgttyb *@var{attributes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct ioctl, BSD only. + +This function sets the attributes of a terminal. + +@code{stty} sets the terminal attributes of the terminal which is open with +file descriptor @var{filedes} to those described by *@var{attributes}. +@end deftypefun + +@node Line Control +@section Line Control Functions +@cindex terminal line control functions + +These functions perform miscellaneous control actions on terminal +devices. As regards terminal access, they are treated like doing +output: if any of these functions is used by a background process on its +controlling terminal, normally all processes in the process group are +sent a @code{SIGTTOU} signal. The exception is if the calling process +itself is ignoring or blocking @code{SIGTTOU} signals, in which case the +operation is performed and no signal is sent. @xref{Job Control}. + +@cindex break condition, generating +@comment termios.h +@comment POSIX.1 +@deftypefun int tcsendbreak (int @var{filedes}, int @var{duration}) +@safety{@prelim{}@mtunsafe{@mtasurace{:tcattr(filedes)/bsd}}@asunsafe{}@acunsafe{@acucorrupt{/bsd}}} +@c On Linux, this calls just one out of two ioctls; on BSD, it's two +@c ioctls with a select (for the delay only) in between, the first +@c setting and the latter clearing the break status. The BSD +@c implementation may leave the break enabled if cancelled, and threads +@c and signals may cause the break to be interrupted before requested. +This function generates a break condition by transmitting a stream of +zero bits on the terminal associated with the file descriptor +@var{filedes}. The duration of the break is controlled by the +@var{duration} argument. If zero, the duration is between 0.25 and 0.5 +seconds. The meaning of a nonzero value depends on the operating system. + +This function does nothing if the terminal is not an asynchronous serial +data port. + +The return value is normally zero. In the event of an error, a value +of @math{-1} is returned. The following @code{errno} error conditions +are defined for this function: + +@table @code +@item EBADF +The @var{filedes} is not a valid file descriptor. + +@item ENOTTY +The @var{filedes} is not associated with a terminal device. +@end table +@end deftypefun + + +@cindex flushing terminal output queue +@cindex terminal output queue, flushing +@comment termios.h +@comment POSIX.1 +@deftypefun int tcdrain (int @var{filedes}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct ioctl. +The @code{tcdrain} function waits until all queued +output to the terminal @var{filedes} has been transmitted. + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{tcdrain} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this calls to @code{tcdrain} should be +protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The return value is normally zero. In the event of an error, a value +of @math{-1} is returned. The following @code{errno} error conditions +are defined for this function: + +@table @code +@item EBADF +The @var{filedes} is not a valid file descriptor. + +@item ENOTTY +The @var{filedes} is not associated with a terminal device. + +@item EINTR +The operation was interrupted by delivery of a signal. +@xref{Interrupted Primitives}. +@end table +@end deftypefun + + +@cindex clearing terminal input queue +@cindex terminal input queue, clearing +@comment termios.h +@comment POSIX.1 +@deftypefun int tcflush (int @var{filedes}, int @var{queue}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Direct ioctl. +The @code{tcflush} function is used to clear the input and/or output +queues associated with the terminal file @var{filedes}. The @var{queue} +argument specifies which queue(s) to clear, and can be one of the +following values: + +@c Extra blank lines here make it look better. +@vtable @code +@item TCIFLUSH + +Clear any input data received, but not yet read. + +@item TCOFLUSH + +Clear any output data written, but not yet transmitted. + +@item TCIOFLUSH + +Clear both queued input and output. +@end vtable + +The return value is normally zero. In the event of an error, a value +of @math{-1} is returned. The following @code{errno} error conditions +are defined for this function: + +@table @code +@item EBADF +The @var{filedes} is not a valid file descriptor. + +@item ENOTTY +The @var{filedes} is not associated with a terminal device. + +@item EINVAL +A bad value was supplied as the @var{queue} argument. +@end table + +It is unfortunate that this function is named @code{tcflush}, because +the term ``flush'' is normally used for quite another operation---waiting +until all output is transmitted---and using it for discarding input or +output would be confusing. Unfortunately, the name @code{tcflush} comes +from POSIX and we cannot change it. +@end deftypefun + +@cindex flow control, terminal +@cindex terminal flow control +@comment termios.h +@comment POSIX.1 +@deftypefun int tcflow (int @var{filedes}, int @var{action}) +@safety{@prelim{}@mtunsafe{@mtasurace{:tcattr(filedes)/bsd}}@asunsafe{}@acsafe{}} +@c Direct ioctl on Linux. On BSD, the TCO* actions are a single ioctl, +@c whereas the TCI actions first call tcgetattr and then write to the fd +@c the c_cc character corresponding to the action; there's a window for +@c another thread to change the xon/xoff characters. +The @code{tcflow} function is used to perform operations relating to +XON/XOFF flow control on the terminal file specified by @var{filedes}. + +The @var{action} argument specifies what operation to perform, and can +be one of the following values: + +@vtable @code +@item TCOOFF +Suspend transmission of output. + +@item TCOON +Restart transmission of output. + +@item TCIOFF +Transmit a STOP character. + +@item TCION +Transmit a START character. +@end vtable + +For more information about the STOP and START characters, see @ref{Special +Characters}. + +The return value is normally zero. In the event of an error, a value +of @math{-1} is returned. The following @code{errno} error conditions +are defined for this function: + +@table @code +@vindex EBADF +@item EBADF +The @var{filedes} is not a valid file descriptor. + +@vindex ENOTTY +@item ENOTTY +The @var{filedes} is not associated with a terminal device. + +@vindex EINVAL +@item EINVAL +A bad value was supplied as the @var{action} argument. +@end table +@end deftypefun + +@node Noncanon Example +@section Noncanonical Mode Example + +Here is an example program that shows how you can set up a terminal +device to read single characters in noncanonical input mode, without +echo. + +@smallexample +@include termios.c.texi +@end smallexample + +This program is careful to restore the original terminal modes before +exiting or terminating with a signal. It uses the @code{atexit} +function (@pxref{Cleanups on Exit}) to make sure this is done +by @code{exit}. + +@ignore +@c !!!! the example doesn't handle any signals! +The signals handled in the example are the ones that typically occur due +to actions of the user. It might be desirable to handle other signals +such as SIGSEGV that can result from bugs in the program. +@end ignore + +The shell is supposed to take care of resetting the terminal modes when +a process is stopped or continued; see @ref{Job Control}. But some +existing shells do not actually do this, so you may wish to establish +handlers for job control signals that reset terminal modes. The above +example does so. + + +@node Pseudo-Terminals +@section Pseudo-Terminals +@cindex pseudo-terminals + +A @dfn{pseudo-terminal} is a special interprocess communication channel +that acts like a terminal. One end of the channel is called the +@dfn{master} side or @dfn{master pseudo-terminal device}, the other side +is called the @dfn{slave} side. Data written to the master side is +received by the slave side as if it was the result of a user typing at +an ordinary terminal, and data written to the slave side is sent to the +master side as if it was written on an ordinary terminal. + +Pseudo terminals are the way programs like @code{xterm} and @code{emacs} +implement their terminal emulation functionality. + +@menu +* Allocation:: Allocating a pseudo terminal. +* Pseudo-Terminal Pairs:: How to open both sides of a + pseudo-terminal in a single operation. +@end menu + +@node Allocation +@subsection Allocating Pseudo-Terminals +@cindex allocating pseudo-terminals + +@pindex stdlib.h +This subsection describes functions for allocating a pseudo-terminal, +and for making this pseudo-terminal available for actual use. These +functions are declared in the header file @file{stdlib.h}. + +@comment stdlib.h +@comment GNU +@deftypefun int getpt (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}} +@c On BSD, tries to open multiple potential pty names, returning on the +@c first success. On Linux, try posix_openpt first, then fallback to +@c the BSD implementation. The posix implementation opens the ptmx +@c device, checks with statfs that /dev/pts is a devpts or that /dev is +@c a devfs, and returns the fd; static variables devpts_mounted and +@c have_no_dev_ptmx are safely initialized so as to avoid repeated +@c tests. +The @code{getpt} function returns a new file descriptor for the next +available master pseudo-terminal. The normal return value from +@code{getpt} is a non-negative integer file descriptor. In the case of +an error, a value of @math{-1} is returned instead. The following +@code{errno} conditions are defined for this function: + +@table @code +@item ENOENT +There are no free master pseudo-terminals available. +@end table + +This function is a GNU extension. +@end deftypefun + +@comment stdlib.h +@comment SVID, XPG4.2 +@deftypefun int grantpt (int @var{filedes}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c grantpt @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c unix/grantpt:pts_name @acsuheap @acsmem +@c ptsname_internal dup ok (but this is Linux-only!) +@c memchr dup ok +@c realloc dup @acsuheap @acsmem +@c malloc dup @acsuheap @acsmem +@c free dup @acsuheap @acsmem +@c fcntl dup ok +@c getuid dup ok +@c chown dup ok +@c sysconf(_SC_GETGR_R_SIZE_MAX) ok +@c getgrnam_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c getgid dup ok +@c chmod dup ok +@c fork dup @aculock +@c [child] +@c setrlimit +@c dup2 +@c CLOSE_ALL_FDS +@c execle +@c _exit +@c waitpid dup ok +@c WIFEXITED dup ok +@c WEXITSTATUS dup ok +@c free dup @ascuheap @acsmem +The @code{grantpt} function changes the ownership and access permission +of the slave pseudo-terminal device corresponding to the master +pseudo-terminal device associated with the file descriptor +@var{filedes}. The owner is set from the real user ID of the calling +process (@pxref{Process Persona}), and the group is set to a special +group (typically @dfn{tty}) or from the real group ID of the calling +process. The access permission is set such that the file is both +readable and writable by the owner and only writable by the group. + +On some systems this function is implemented by invoking a special +@code{setuid} root program (@pxref{How Change Persona}). As a +consequence, installing a signal handler for the @code{SIGCHLD} signal +(@pxref{Job Control Signals}) may interfere with a call to +@code{grantpt}. + +The normal return value from @code{grantpt} is @math{0}; a value of +@math{-1} is returned in case of failure. The following @code{errno} +error conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item EINVAL +The @var{filedes} argument is not associated with a master pseudo-terminal +device. + +@item EACCES +The slave pseudo-terminal device corresponding to the master associated +with @var{filedes} could not be accessed. +@end table + +@end deftypefun + +@comment stdlib.h +@comment SVID, XPG4.2 +@deftypefun int unlockpt (int @var{filedes}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{/bsd}}@acunsafe{@acsmem{} @acsfd{}}} +@c unlockpt @ascuheap/bsd @acsmem @acsfd +@c /bsd +@c ptsname_r dup @ascuheap @acsmem @acsfd +@c revoke ok (syscall) +@c /linux +@c ioctl dup ok +The @code{unlockpt} function unlocks the slave pseudo-terminal device +corresponding to the master pseudo-terminal device associated with the +file descriptor @var{filedes}. On many systems, the slave can only be +opened after unlocking, so portable applications should always call +@code{unlockpt} before trying to open the slave. + +The normal return value from @code{unlockpt} is @math{0}; a value of +@math{-1} is returned in case of failure. The following @code{errno} +error conditions are defined for this function: + +@table @code +@item EBADF +The @var{filedes} argument is not a valid file descriptor. + +@item EINVAL +The @var{filedes} argument is not associated with a master pseudo-terminal +device. +@end table +@end deftypefun + +@comment stdlib.h +@comment SVID, XPG4.2 +@deftypefun {char *} ptsname (int @var{filedes}) +@safety{@prelim{}@mtunsafe{@mtasurace{:ptsname}}@asunsafe{@ascuheap{/bsd}}@acunsafe{@acsmem{} @acsfd{}}} +@c ptsname @mtasurace:ptsname @ascuheap/bsd @acsmem @acsfd +@c ptsname_r dup @ascuheap/bsd @acsmem @acsfd +If the file descriptor @var{filedes} is associated with a +master pseudo-terminal device, the @code{ptsname} function returns a +pointer to a statically-allocated, null-terminated string containing the +file name of the associated slave pseudo-terminal file. This string +might be overwritten by subsequent calls to @code{ptsname}. +@end deftypefun + +@comment stdlib.h +@comment GNU +@deftypefun int ptsname_r (int @var{filedes}, char *@var{buf}, size_t @var{len}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{/bsd}}@acunsafe{@acsmem{} @acsfd{}}} +@c ptsname_r @ascuheap/bsd @acsmem @acsfd +@c /hurd +@c term_get_peername ok +@c strlen dup ok +@c memcpy dup ok +@c /bsd +@c isatty dup ok +@c strlen dup ok +@c ttyname_r dup @ascuheap @acsmem @acsfd +@c stat dup ok +@c /linux +@c ptsname_internal ok +@c isatty dup ok +@c ioctl dup ok +@c strlen dup ok +@c itoa_word dup ok +@c stpcpy dup ok +@c memcpy dup ok +@c fxstat64 dup ok +@c MASTER_P ok +@c major ok +@c gnu_dev_major ok +@c minor ok +@c gnu_dev_minor ok +@c minor dup ok +@c xstat64 dup ok +@c S_ISCHR dup ok +@c SLAVE_P ok +@c major dup ok +@c minor dup ok +The @code{ptsname_r} function is similar to the @code{ptsname} function +except that it places its result into the user-specified buffer starting +at @var{buf} with length @var{len}. + +This function is a GNU extension. +@end deftypefun + +@strong{Portability Note:} On @w{System V} derived systems, the file +returned by the @code{ptsname} and @code{ptsname_r} functions may be +STREAMS-based, and therefore require additional processing after opening +before it actually behaves as a pseudo terminal. +@c FIXME: xref STREAMS + +Typical usage of these functions is illustrated by the following example: +@smallexample +int +open_pty_pair (int *amaster, int *aslave) +@{ + int master, slave; + char *name; + + master = getpt (); + if (master < 0) + return 0; + + if (grantpt (master) < 0 || unlockpt (master) < 0) + goto close_master; + name = ptsname (master); + if (name == NULL) + goto close_master; + + slave = open (name, O_RDWR); + if (slave == -1) + goto close_master; + + if (isastream (slave)) + @{ + if (ioctl (slave, I_PUSH, "ptem") < 0 + || ioctl (slave, I_PUSH, "ldterm") < 0) + goto close_slave; + @} + + *amaster = master; + *aslave = slave; + return 1; + +close_slave: + close (slave); + +close_master: + close (master); + return 0; +@} +@end smallexample + +@node Pseudo-Terminal Pairs +@subsection Opening a Pseudo-Terminal Pair +@cindex opening a pseudo-terminal pair + +These functions, derived from BSD, are available in the separate +@file{libutil} library, and declared in @file{pty.h}. + +@comment pty.h +@comment BSD +@deftypefun int openpty (int *@var{amaster}, int *@var{aslave}, char *@var{name}, const struct termios *@var{termp}, const struct winsize *@var{winp}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c openpty @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c getpt @acsfd +@c grantpt @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c unlockpt dup @ascuheap/bsd @acsmem @acsfd +@c openpty:pts_name @acsuheap @acsmem @acsfd +@c ptsname_r dup @ascuheap/bsd @acsmem @acsfd +@c realloc dup @acsuheap @acsmem +@c malloc dup @acsuheap @acsmem +@c free dup @acsuheap @acsmem +@c open dup @acsfd +@c free dup @acsuheap @acsmem +@c tcsetattr dup ok +@c ioctl dup ok +@c strcpy dup ok +@c close dup @acsfd +This function allocates and opens a pseudo-terminal pair, returning the +file descriptor for the master in @var{*amaster}, and the file +descriptor for the slave in @var{*aslave}. If the argument @var{name} +is not a null pointer, the file name of the slave pseudo-terminal +device is stored in @code{*name}. If @var{termp} is not a null pointer, +the terminal attributes of the slave are set to the ones specified in +the structure that @var{termp} points to (@pxref{Terminal Modes}). +Likewise, if @var{winp} is not a null pointer, the screen size of +the slave is set to the values specified in the structure that +@var{winp} points to. + +The normal return value from @code{openpty} is @math{0}; a value of +@math{-1} is returned in case of failure. The following @code{errno} +conditions are defined for this function: + +@table @code +@item ENOENT +There are no free pseudo-terminal pairs available. +@end table + +@strong{Warning:} Using the @code{openpty} function with @var{name} not +set to @code{NULL} is @strong{very dangerous} because it provides no +protection against overflowing the string @var{name}. You should use +the @code{ttyname} function on the file descriptor returned in +@var{*slave} to find out the file name of the slave pseudo-terminal +device instead. +@end deftypefun + +@comment pty.h +@comment BSD +@deftypefun int forkpty (int *@var{amaster}, char *@var{name}, const struct termios *@var{termp}, const struct winsize *@var{winp}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c forkpty @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c openpty dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c fork dup @aculock +@c close dup @acsfd +@c /child +@c close dup @acsfd +@c login_tty dup @mtasurace:ttyname @ascuheap @asulock @aculock @acsmem @acsfd +@c _exit dup ok +@c close dup @acsfd +This function is similar to the @code{openpty} function, but in +addition, forks a new process (@pxref{Creating a Process}) and makes the +newly opened slave pseudo-terminal device the controlling terminal +(@pxref{Controlling Terminal}) for the child process. + +If the operation is successful, there are then both parent and child +processes and both see @code{forkpty} return, but with different values: +it returns a value of @math{0} in the child process and returns the child's +process ID in the parent process. + +If the allocation of a pseudo-terminal pair or the process creation +failed, @code{forkpty} returns a value of @math{-1} in the parent +process. + +@strong{Warning:} The @code{forkpty} function has the same problems with +respect to the @var{name} argument as @code{openpty}. +@end deftypefun diff --git a/REORG.TODO/manual/texinfo.tex b/REORG.TODO/manual/texinfo.tex new file mode 100644 index 0000000000..6d45464eb2 --- /dev/null +++ b/REORG.TODO/manual/texinfo.tex @@ -0,0 +1,11676 @@ +% texinfo.tex -- TeX macros to handle Texinfo files. +% +% Load plain if necessary, i.e., if running under initex. +\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi +% +\def\texinfoversion{2016-09-18.18} +% +% Copyright 1985, 1986, 1988, 1990, 1991, 1992, 1993, 1994, 1995, +% 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +% 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 +% Free Software Foundation, Inc. +% +% This texinfo.tex file is free software: you can redistribute it and/or +% modify it under the terms of the GNU General Public License as +% published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This texinfo.tex file is distributed in the hope that it will be +% useful, but WITHOUT ANY WARRANTY; without even the implied warranty +% of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +% General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see <http://www.gnu.org/licenses/>. +% +% As a special exception, when this file is read by TeX when processing +% a Texinfo source document, you may use the result without +% restriction. This Exception is an additional permission under section 7 +% of the GNU General Public License, version 3 ("GPLv3"). +% +% Please try the latest version of texinfo.tex before submitting bug +% reports; you can get the latest version from: +% http://ftp.gnu.org/gnu/texinfo/ (the Texinfo release area), or +% http://ftpmirror.gnu.org/texinfo/ (same, via a mirror), or +% http://www.gnu.org/software/texinfo/ (the Texinfo home page) +% The texinfo.tex in any given distribution could well be out +% of date, so if that's what you're using, please check. +% +% Send bug reports to bug-texinfo@gnu.org. Please include including a +% complete document in each bug report with which we can reproduce the +% problem. Patches are, of course, greatly appreciated. +% +% To process a Texinfo manual with TeX, it's most reliable to use the +% texi2dvi shell script that comes with the distribution. For a simple +% manual foo.texi, however, you can get away with this: +% tex foo.texi +% texindex foo.?? +% tex foo.texi +% tex foo.texi +% dvips foo.dvi -o # or whatever; this makes foo.ps. +% The extra TeX runs get the cross-reference information correct. +% Sometimes one run after texindex suffices, and sometimes you need more +% than two; texi2dvi does it as many times as necessary. +% +% It is possible to adapt texinfo.tex for other languages, to some +% extent. You can get the existing language-specific files from the +% full Texinfo distribution. +% +% The GNU Texinfo home page is http://www.gnu.org/software/texinfo. + + +\message{Loading texinfo [version \texinfoversion]:} + +% If in a .fmt file, print the version number +% and turn on active characters that we couldn't do earlier because +% they might have appeared in the input file name. +\everyjob{\message{[Texinfo version \texinfoversion]}% + \catcode`+=\active \catcode`\_=\active} + +% LaTeX's \typeout. This ensures that the messages it is used for +% are identical in format to the corresponding ones from latex/pdflatex. +\def\typeout{\immediate\write17}% + +\chardef\other=12 + +% We never want plain's \outer definition of \+ in Texinfo. +% For @tex, we can use \tabalign. +\let\+ = \relax + +% Save some plain tex macros whose names we will redefine. +\let\ptexb=\b +\let\ptexbullet=\bullet +\let\ptexc=\c +\let\ptexcomma=\, +\let\ptexdot=\. +\let\ptexdots=\dots +\let\ptexend=\end +\let\ptexequiv=\equiv +\let\ptexexclam=\! +\let\ptexfootnote=\footnote +\let\ptexgtr=> +\let\ptexhat=^ +\let\ptexi=\i +\let\ptexindent=\indent +\let\ptexinsert=\insert +\let\ptexlbrace=\{ +\let\ptexless=< +\let\ptexnewwrite\newwrite +\let\ptexnoindent=\noindent +\let\ptexplus=+ +\let\ptexraggedright=\raggedright +\let\ptexrbrace=\} +\let\ptexslash=\/ +\let\ptexsp=\sp +\let\ptexstar=\* +\let\ptexsup=\sup +\let\ptext=\t +\let\ptextop=\top +{\catcode`\'=\active \global\let\ptexquoteright'}% active in plain's math mode + +% If this character appears in an error message or help string, it +% starts a new line in the output. +\newlinechar = `^^J + +% Use TeX 3.0's \inputlineno to get the line number, for better error +% messages, but if we're using an old version of TeX, don't do anything. +% +\ifx\inputlineno\thisisundefined + \let\linenumber = \empty % Pre-3.0. +\else + \def\linenumber{l.\the\inputlineno:\space} +\fi + +% Set up fixed words for English if not already set. +\ifx\putwordAppendix\undefined \gdef\putwordAppendix{Appendix}\fi +\ifx\putwordChapter\undefined \gdef\putwordChapter{Chapter}\fi +\ifx\putworderror\undefined \gdef\putworderror{error}\fi +\ifx\putwordfile\undefined \gdef\putwordfile{file}\fi +\ifx\putwordin\undefined \gdef\putwordin{in}\fi +\ifx\putwordIndexIsEmpty\undefined \gdef\putwordIndexIsEmpty{(Index is empty)}\fi +\ifx\putwordIndexNonexistent\undefined \gdef\putwordIndexNonexistent{(Index is nonexistent)}\fi +\ifx\putwordInfo\undefined \gdef\putwordInfo{Info}\fi +\ifx\putwordInstanceVariableof\undefined \gdef\putwordInstanceVariableof{Instance Variable of}\fi +\ifx\putwordMethodon\undefined \gdef\putwordMethodon{Method on}\fi +\ifx\putwordNoTitle\undefined \gdef\putwordNoTitle{No Title}\fi +\ifx\putwordof\undefined \gdef\putwordof{of}\fi +\ifx\putwordon\undefined \gdef\putwordon{on}\fi +\ifx\putwordpage\undefined \gdef\putwordpage{page}\fi +\ifx\putwordsection\undefined \gdef\putwordsection{section}\fi +\ifx\putwordSection\undefined \gdef\putwordSection{Section}\fi +\ifx\putwordsee\undefined \gdef\putwordsee{see}\fi +\ifx\putwordSee\undefined \gdef\putwordSee{See}\fi +\ifx\putwordShortTOC\undefined \gdef\putwordShortTOC{Short Contents}\fi +\ifx\putwordTOC\undefined \gdef\putwordTOC{Table of Contents}\fi +% +\ifx\putwordMJan\undefined \gdef\putwordMJan{January}\fi +\ifx\putwordMFeb\undefined \gdef\putwordMFeb{February}\fi +\ifx\putwordMMar\undefined \gdef\putwordMMar{March}\fi +\ifx\putwordMApr\undefined \gdef\putwordMApr{April}\fi +\ifx\putwordMMay\undefined \gdef\putwordMMay{May}\fi +\ifx\putwordMJun\undefined \gdef\putwordMJun{June}\fi +\ifx\putwordMJul\undefined \gdef\putwordMJul{July}\fi +\ifx\putwordMAug\undefined \gdef\putwordMAug{August}\fi +\ifx\putwordMSep\undefined \gdef\putwordMSep{September}\fi +\ifx\putwordMOct\undefined \gdef\putwordMOct{October}\fi +\ifx\putwordMNov\undefined \gdef\putwordMNov{November}\fi +\ifx\putwordMDec\undefined \gdef\putwordMDec{December}\fi +% +\ifx\putwordDefmac\undefined \gdef\putwordDefmac{Macro}\fi +\ifx\putwordDefspec\undefined \gdef\putwordDefspec{Special Form}\fi +\ifx\putwordDefvar\undefined \gdef\putwordDefvar{Variable}\fi +\ifx\putwordDefopt\undefined \gdef\putwordDefopt{User Option}\fi +\ifx\putwordDeffunc\undefined \gdef\putwordDeffunc{Function}\fi + +% Give the space character the catcode for a space. +\def\spaceisspace{\catcode`\ =10\relax} + +\chardef\dashChar = `\- +\chardef\slashChar = `\/ +\chardef\underChar = `\_ + +% Ignore a token. +% +\def\gobble#1{} + +% The following is used inside several \edef's. +\def\makecsname#1{\expandafter\noexpand\csname#1\endcsname} + +% Hyphenation fixes. +\hyphenation{ + Flor-i-da Ghost-script Ghost-view Mac-OS Post-Script + ap-pen-dix bit-map bit-maps + data-base data-bases eshell fall-ing half-way long-est man-u-script + man-u-scripts mini-buf-fer mini-buf-fers over-view par-a-digm + par-a-digms rath-er rec-tan-gu-lar ro-bot-ics se-vere-ly set-up spa-ces + spell-ing spell-ings + stand-alone strong-est time-stamp time-stamps which-ever white-space + wide-spread wrap-around +} + +% Sometimes it is convenient to have everything in the transcript file +% and nothing on the terminal. We don't just call \tracingall here, +% since that produces some useless output on the terminal. We also make +% some effort to order the tracing commands to reduce output in the log +% file; cf. trace.sty in LaTeX. +% +\def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}% +\def\loggingall{% + \tracingstats2 + \tracingpages1 + \tracinglostchars2 % 2 gives us more in etex + \tracingparagraphs1 + \tracingoutput1 + \tracingmacros2 + \tracingrestores1 + \showboxbreadth\maxdimen \showboxdepth\maxdimen + \ifx\eTeXversion\thisisundefined\else % etex gives us more logging + \tracingscantokens1 + \tracingifs1 + \tracinggroups1 + \tracingnesting2 + \tracingassigns1 + \fi + \tracingcommands3 % 3 gives us more in etex + \errorcontextlines16 +}% + +% @errormsg{MSG}. Do the index-like expansions on MSG, but if things +% aren't perfect, it's not the end of the world, being an error message, +% after all. +% +\def\errormsg{\begingroup \indexnofonts \doerrormsg} +\def\doerrormsg#1{\errmessage{#1}} + +% add check for \lastpenalty to plain's definitions. If the last thing +% we did was a \nobreak, we don't want to insert more space. +% +\def\smallbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\smallskipamount + \removelastskip\penalty-50\smallskip\fi\fi} +\def\medbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\medskipamount + \removelastskip\penalty-100\medskip\fi\fi} +\def\bigbreak{\ifnum\lastpenalty<10000\par\ifdim\lastskip<\bigskipamount + \removelastskip\penalty-200\bigskip\fi\fi} + +% Output routine +% + +% For a final copy, take out the rectangles +% that mark overfull boxes (in case you have decided +% that the text looks ok even though it passes the margin). +% +\def\finalout{\overfullrule=0pt } + +% Do @cropmarks to get crop marks. +% +\newif\ifcropmarks +\let\cropmarks = \cropmarkstrue +% +% Dimensions to add cropmarks at corners. +% Added by P. A. MacKay, 12 Nov. 1986 +% +\newdimen\outerhsize \newdimen\outervsize % set by the paper size routines +\newdimen\cornerlong \cornerlong=1pc +\newdimen\cornerthick \cornerthick=.3pt +\newdimen\topandbottommargin \topandbottommargin=.75in + +% Output a mark which sets \thischapter, \thissection and \thiscolor. +% We dump everything together because we only have one kind of mark. +% This works because we only use \botmark / \topmark, not \firstmark. +% +% A mark contains a subexpression of the \ifcase ... \fi construct. +% \get*marks macros below extract the needed part using \ifcase. +% +% Another complication is to let the user choose whether \thischapter +% (\thissection) refers to the chapter (section) in effect at the top +% of a page, or that at the bottom of a page. + +% \domark is called twice inside \chapmacro, to add one +% mark before the section break, and one after. +% In the second call \prevchapterdefs is the same as \lastchapterdefs, +% and \prevsectiondefs is the same as \lastsectiondefs. +% Then if the page is not broken at the mark, some of the previous +% section appears on the page, and we can get the name of this section +% from \firstmark for @everyheadingmarks top. +% @everyheadingmarks bottom uses \botmark. +% +% See page 260 of The TeXbook. +\def\domark{% + \toks0=\expandafter{\lastchapterdefs}% + \toks2=\expandafter{\lastsectiondefs}% + \toks4=\expandafter{\prevchapterdefs}% + \toks6=\expandafter{\prevsectiondefs}% + \toks8=\expandafter{\lastcolordefs}% + \mark{% + \the\toks0 \the\toks2 % 0: marks for @everyheadingmarks top + \noexpand\or \the\toks4 \the\toks6 % 1: for @everyheadingmarks bottom + \noexpand\else \the\toks8 % 2: color marks + }% +} + +% \gettopheadingmarks, \getbottomheadingmarks, +% \getcolormarks - extract needed part of mark. +% +% \topmark doesn't work for the very first chapter (after the title +% page or the contents), so we use \firstmark there -- this gets us +% the mark with the chapter defs, unless the user sneaks in, e.g., +% @setcolor (or @url, or @link, etc.) between @contents and the very +% first @chapter. +\def\gettopheadingmarks{% + \ifcase0\topmark\fi + \ifx\thischapter\empty \ifcase0\firstmark\fi \fi +} +\def\getbottomheadingmarks{\ifcase1\botmark\fi} +\def\getcolormarks{\ifcase2\topmark\fi} + +% Avoid "undefined control sequence" errors. +\def\lastchapterdefs{} +\def\lastsectiondefs{} +\def\lastsection{} +\def\prevchapterdefs{} +\def\prevsectiondefs{} +\def\lastcolordefs{} + +% Margin to add to right of even pages, to left of odd pages. +\newdimen\bindingoffset +\newdimen\normaloffset +\newdimen\txipagewidth \newdimen\txipageheight + +% Main output routine. +% +\chardef\PAGE = 255 +\output = {\onepageout{\pagecontents\PAGE}} + +\newbox\headlinebox +\newbox\footlinebox + +% \onepageout takes a vbox as an argument. +% \shipout a vbox for a single page, adding an optional header, footer, +% cropmarks, and footnote. This also causes index entries for this page +% to be written to the auxiliary files. +% +\def\onepageout#1{% + \ifcropmarks \hoffset=0pt \else \hoffset=\normaloffset \fi + % + \ifodd\pageno \advance\hoffset by \bindingoffset + \else \advance\hoffset by -\bindingoffset\fi + % + % Common context changes for both heading and footing. + % Do this outside of the \shipout so @code etc. will be expanded in + % the headline as they should be, not taken literally (outputting ''code). + \def\commmonheadfootline{\let\hsize=\txipagewidth \texinfochars} + % + % Retrieve the information for the headings from the marks in the page, + % and call Plain TeX's \makeheadline and \makefootline, which use the + % values in \headline and \footline. + % + % This is used to check if we are on the first page of a chapter. + \ifcase1\topmark\fi + \let\prevchaptername\thischaptername + \ifcase0\firstmark\fi + \let\curchaptername\thischaptername + % + \ifodd\pageno \getoddheadingmarks \else \getevenheadingmarks \fi + \ifodd\pageno \getoddfootingmarks \else \getevenfootingmarks \fi + % + \ifx\curchaptername\prevchaptername + \let\thischapterheading\thischapter + \else + % \thischapterheading is the same as \thischapter except it is blank + % for the first page of a chapter. This is to prevent the chapter name + % being shown twice. + \def\thischapterheading{}% + \fi + % + \global\setbox\headlinebox = \vbox{\commmonheadfootline \makeheadline}% + \global\setbox\footlinebox = \vbox{\commmonheadfootline \makefootline}% + % + {% + % Set context for writing to auxiliary files like index files. + % Have to do this stuff outside the \shipout because we want it to + % take effect in \write's, yet the group defined by the \vbox ends + % before the \shipout runs. + % + \indexdummies % don't expand commands in the output. + \normalturnoffactive % \ in index entries must not stay \, e.g., if + % the page break happens to be in the middle of an example. + % We don't want .vr (or whatever) entries like this: + % \entry{{\indexbackslash }acronym}{32}{\code {\acronym}} + % "\acronym" won't work when it's read back in; + % it needs to be + % {\code {{\backslashcurfont }acronym} + \shipout\vbox{% + % Do this early so pdf references go to the beginning of the page. + \ifpdfmakepagedest \pdfdest name{\the\pageno} xyz\fi + % + \ifcropmarks \vbox to \outervsize\bgroup + \hsize = \outerhsize + \vskip-\topandbottommargin + \vtop to0pt{% + \line{\ewtop\hfil\ewtop}% + \nointerlineskip + \line{% + \vbox{\moveleft\cornerthick\nstop}% + \hfill + \vbox{\moveright\cornerthick\nstop}% + }% + \vss}% + \vskip\topandbottommargin + \line\bgroup + \hfil % center the page within the outer (page) hsize. + \ifodd\pageno\hskip\bindingoffset\fi + \vbox\bgroup + \fi + % + \unvbox\headlinebox + \pagebody{#1}% + \ifdim\ht\footlinebox > 0pt + % Only leave this space if the footline is nonempty. + % (We lessened \vsize for it in \oddfootingyyy.) + % The \baselineskip=24pt in plain's \makefootline has no effect. + \vskip 24pt + \unvbox\footlinebox + \fi + % + \ifcropmarks + \egroup % end of \vbox\bgroup + \hfil\egroup % end of (centering) \line\bgroup + \vskip\topandbottommargin plus1fill minus1fill + \boxmaxdepth = \cornerthick + \vbox to0pt{\vss + \line{% + \vbox{\moveleft\cornerthick\nsbot}% + \hfill + \vbox{\moveright\cornerthick\nsbot}% + }% + \nointerlineskip + \line{\ewbot\hfil\ewbot}% + }% + \egroup % \vbox from first cropmarks clause + \fi + }% end of \shipout\vbox + }% end of group with \indexdummies + \advancepageno + \ifnum\outputpenalty>-20000 \else\dosupereject\fi +} + +\newinsert\margin \dimen\margin=\maxdimen + +% Main part of page, including any footnotes +\def\pagebody#1{\vbox to\txipageheight{\boxmaxdepth=\maxdepth #1}} +{\catcode`\@ =11 +\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi +% marginal hacks, juha@viisa.uucp (Juha Takala) +\ifvoid\margin\else % marginal info is present + \rlap{\kern\hsize\vbox to\z@{\kern1pt\box\margin \vss}}\fi +\dimen@=\dp#1\relax \unvbox#1\relax +\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi +\ifr@ggedbottom \kern-\dimen@ \vfil \fi} +} + +% Here are the rules for the cropmarks. Note that they are +% offset so that the space between them is truly \outerhsize or \outervsize +% (P. A. MacKay, 12 November, 1986) +% +\def\ewtop{\vrule height\cornerthick depth0pt width\cornerlong} +\def\nstop{\vbox + {\hrule height\cornerthick depth\cornerlong width\cornerthick}} +\def\ewbot{\vrule height0pt depth\cornerthick width\cornerlong} +\def\nsbot{\vbox + {\hrule height\cornerlong depth\cornerthick width\cornerthick}} + + +% Argument parsing + +% Parse an argument, then pass it to #1. The argument is the rest of +% the input line (except we remove a trailing comment). #1 should be a +% macro which expects an ordinary undelimited TeX argument. +% For example, \def\foo{\parsearg\fooxxx}. +% +\def\parsearg{\parseargusing{}} +\def\parseargusing#1#2{% + \def\argtorun{#2}% + \begingroup + \obeylines + \spaceisspace + #1% + \parseargline\empty% Insert the \empty token, see \finishparsearg below. +} + +{\obeylines % + \gdef\parseargline#1^^M{% + \endgroup % End of the group started in \parsearg. + \argremovecomment #1\comment\ArgTerm% + }% +} + +% First remove any @comment, then any @c comment. Also remove a @texinfoc +% comment (see \scanmacro for details). Pass the result on to \argcheckspaces. +\def\argremovecomment#1\comment#2\ArgTerm{\argremovec #1\c\ArgTerm} +\def\argremovec#1\c#2\ArgTerm{\argremovetexinfoc #1\texinfoc\ArgTerm} +\def\argremovetexinfoc#1\texinfoc#2\ArgTerm{\argcheckspaces#1\^^M\ArgTerm} + +% Each occurrence of `\^^M' or `<space>\^^M' is replaced by a single space. +% +% \argremovec might leave us with trailing space, e.g., +% @end itemize @c foo +% This space token undergoes the same procedure and is eventually removed +% by \finishparsearg. +% +\def\argcheckspaces#1\^^M{\argcheckspacesX#1\^^M \^^M} +\def\argcheckspacesX#1 \^^M{\argcheckspacesY#1\^^M} +\def\argcheckspacesY#1\^^M#2\^^M#3\ArgTerm{% + \def\temp{#3}% + \ifx\temp\empty + % Do not use \next, perhaps the caller of \parsearg uses it; reuse \temp: + \let\temp\finishparsearg + \else + \let\temp\argcheckspaces + \fi + % Put the space token in: + \temp#1 #3\ArgTerm +} + +% If a _delimited_ argument is enclosed in braces, they get stripped; so +% to get _exactly_ the rest of the line, we had to prevent such situation. +% We prepended an \empty token at the very beginning and we expand it now, +% just before passing the control to \argtorun. +% (Similarly, we have to think about #3 of \argcheckspacesY above: it is +% either the null string, or it ends with \^^M---thus there is no danger +% that a pair of braces would be stripped. +% +% But first, we have to remove the trailing space token. +% +\def\finishparsearg#1 \ArgTerm{\expandafter\argtorun\expandafter{#1}} + + +% \parseargdef - define a command taking an argument on the line +% +% \parseargdef\foo{...} +% is roughly equivalent to +% \def\foo{\parsearg\Xfoo} +% \def\Xfoo#1{...} +\def\parseargdef#1{% + \expandafter \doparseargdef \csname\string#1\endcsname #1% +} +\def\doparseargdef#1#2{% + \def#2{\parsearg#1}% + \def#1##1% +} + +% Several utility definitions with active space: +{ + \obeyspaces + \gdef\obeyedspace{ } + + % Make each space character in the input produce a normal interword + % space in the output. Don't allow a line break at this space, as this + % is used only in environments like @example, where each line of input + % should produce a line of output anyway. + % + \gdef\sepspaces{\obeyspaces\let =\tie} + + % If an index command is used in an @example environment, any spaces + % therein should become regular spaces in the raw index file, not the + % expansion of \tie (\leavevmode \penalty \@M \ ). + \gdef\unsepspaces{\let =\space} +} + + +\def\flushcr{\ifx\par\lisppar \def\next##1{}\else \let\next=\relax \fi \next} + +% Define the framework for environments in texinfo.tex. It's used like this: +% +% \envdef\foo{...} +% \def\Efoo{...} +% +% It's the responsibility of \envdef to insert \begingroup before the +% actual body; @end closes the group after calling \Efoo. \envdef also +% defines \thisenv, so the current environment is known; @end checks +% whether the environment name matches. The \checkenv macro can also be +% used to check whether the current environment is the one expected. +% +% Non-false conditionals (@iftex, @ifset) don't fit into this, so they +% are not treated as environments; they don't open a group. (The +% implementation of @end takes care not to call \endgroup in this +% special case.) + + +% At run-time, environments start with this: +\def\startenvironment#1{\begingroup\def\thisenv{#1}} +% initialize +\let\thisenv\empty + +% ... but they get defined via ``\envdef\foo{...}'': +\long\def\envdef#1#2{\def#1{\startenvironment#1#2}} +\def\envparseargdef#1#2{\parseargdef#1{\startenvironment#1#2}} + +% Check whether we're in the right environment: +\def\checkenv#1{% + \def\temp{#1}% + \ifx\thisenv\temp + \else + \badenverr + \fi +} + +% Environment mismatch, #1 expected: +\def\badenverr{% + \errhelp = \EMsimple + \errmessage{This command can appear only \inenvironment\temp, + not \inenvironment\thisenv}% +} +\def\inenvironment#1{% + \ifx#1\empty + outside of any environment% + \else + in environment \expandafter\string#1% + \fi +} + +% @end foo executes the definition of \Efoo. +% But first, it executes a specialized version of \checkenv +% +\parseargdef\end{% + \if 1\csname iscond.#1\endcsname + \else + % The general wording of \badenverr may not be ideal. + \expandafter\checkenv\csname#1\endcsname + \csname E#1\endcsname + \endgroup + \fi +} + +\newhelp\EMsimple{Press RETURN to continue.} + + +% Be sure we're in horizontal mode when doing a tie, since we make space +% equivalent to this in @example-like environments. Otherwise, a space +% at the beginning of a line will start with \penalty -- and +% since \penalty is valid in vertical mode, we'd end up putting the +% penalty on the vertical list instead of in the new paragraph. +{\catcode`@ = 11 + % Avoid using \@M directly, because that causes trouble + % if the definition is written into an index file. + \global\let\tiepenalty = \@M + \gdef\tie{\leavevmode\penalty\tiepenalty\ } +} + +% @: forces normal size whitespace following. +\def\:{\spacefactor=1000 } + +% @* forces a line break. +\def\*{\unskip\hfil\break\hbox{}\ignorespaces} + +% @/ allows a line break. +\let\/=\allowbreak + +% @. is an end-of-sentence period. +\def\.{.\spacefactor=\endofsentencespacefactor\space} + +% @! is an end-of-sentence bang. +\def\!{!\spacefactor=\endofsentencespacefactor\space} + +% @? is an end-of-sentence query. +\def\?{?\spacefactor=\endofsentencespacefactor\space} + +% @frenchspacing on|off says whether to put extra space after punctuation. +% +\def\onword{on} +\def\offword{off} +% +\parseargdef\frenchspacing{% + \def\temp{#1}% + \ifx\temp\onword \plainfrenchspacing + \else\ifx\temp\offword \plainnonfrenchspacing + \else + \errhelp = \EMsimple + \errmessage{Unknown @frenchspacing option `\temp', must be on|off}% + \fi\fi +} + +% @w prevents a word break. Without the \leavevmode, @w at the +% beginning of a paragraph, when TeX is still in vertical mode, would +% produce a whole line of output instead of starting the paragraph. +\def\w#1{\leavevmode\hbox{#1}} + +% @group ... @end group forces ... to be all on one page, by enclosing +% it in a TeX vbox. We use \vtop instead of \vbox to construct the box +% to keep its height that of a normal line. According to the rules for +% \topskip (p.114 of the TeXbook), the glue inserted is +% max (\topskip - \ht (first item), 0). If that height is large, +% therefore, no glue is inserted, and the space between the headline and +% the text is small, which looks bad. +% +% Another complication is that the group might be very large. This can +% cause the glue on the previous page to be unduly stretched, because it +% does not have much material. In this case, it's better to add an +% explicit \vfill so that the extra space is at the bottom. The +% threshold for doing this is if the group is more than \vfilllimit +% percent of a page (\vfilllimit can be changed inside of @tex). +% +\newbox\groupbox +\def\vfilllimit{0.7} +% +\envdef\group{% + \ifnum\catcode`\^^M=\active \else + \errhelp = \groupinvalidhelp + \errmessage{@group invalid in context where filling is enabled}% + \fi + \startsavinginserts + % + \setbox\groupbox = \vtop\bgroup + % Do @comment since we are called inside an environment such as + % @example, where each end-of-line in the input causes an + % end-of-line in the output. We don't want the end-of-line after + % the `@group' to put extra space in the output. Since @group + % should appear on a line by itself (according to the Texinfo + % manual), we don't worry about eating any user text. + \comment +} +% +% The \vtop produces a box with normal height and large depth; thus, TeX puts +% \baselineskip glue before it, and (when the next line of text is done) +% \lineskip glue after it. Thus, space below is not quite equal to space +% above. But it's pretty close. +\def\Egroup{% + % To get correct interline space between the last line of the group + % and the first line afterwards, we have to propagate \prevdepth. + \endgraf % Not \par, as it may have been set to \lisppar. + \global\dimen1 = \prevdepth + \egroup % End the \vtop. + \addgroupbox + \prevdepth = \dimen1 + \checkinserts +} + +\def\addgroupbox{ + % \dimen0 is the vertical size of the group's box. + \dimen0 = \ht\groupbox \advance\dimen0 by \dp\groupbox + % \dimen2 is how much space is left on the page (more or less). + \dimen2 = \txipageheight \advance\dimen2 by -\pagetotal + % if the group doesn't fit on the current page, and it's a big big + % group, force a page break. + \ifdim \dimen0 > \dimen2 + \ifdim \pagetotal < \vfilllimit\txipageheight + \page + \fi + \fi + \box\groupbox +} + +% +% TeX puts in an \escapechar (i.e., `@') at the beginning of the help +% message, so this ends up printing `@group can only ...'. +% +\newhelp\groupinvalidhelp{% +group can only be used in environments such as @example,^^J% +where each line of input produces a line of output.} + +% @need space-in-mils +% forces a page break if there is not space-in-mils remaining. + +\newdimen\mil \mil=0.001in + +\parseargdef\need{% + % Ensure vertical mode, so we don't make a big box in the middle of a + % paragraph. + \par + % + % If the @need value is less than one line space, it's useless. + \dimen0 = #1\mil + \dimen2 = \ht\strutbox + \advance\dimen2 by \dp\strutbox + \ifdim\dimen0 > \dimen2 + % + % Do a \strut just to make the height of this box be normal, so the + % normal leading is inserted relative to the preceding line. + % And a page break here is fine. + \vtop to #1\mil{\strut\vfil}% + % + % TeX does not even consider page breaks if a penalty added to the + % main vertical list is 10000 or more. But in order to see if the + % empty box we just added fits on the page, we must make it consider + % page breaks. On the other hand, we don't want to actually break the + % page after the empty box. So we use a penalty of 9999. + % + % There is an extremely small chance that TeX will actually break the + % page at this \penalty, if there are no other feasible breakpoints in + % sight. (If the user is using lots of big @group commands, which + % almost-but-not-quite fill up a page, TeX will have a hard time doing + % good page breaking, for example.) However, I could not construct an + % example where a page broke at this \penalty; if it happens in a real + % document, then we can reconsider our strategy. + \penalty9999 + % + % Back up by the size of the box, whether we did a page break or not. + \kern -#1\mil + % + % Do not allow a page break right after this kern. + \nobreak + \fi +} + +% @br forces paragraph break (and is undocumented). + +\let\br = \par + +% @page forces the start of a new page. +% +\def\page{\par\vfill\supereject} + +% @exdent text.... +% outputs text on separate line in roman font, starting at standard page margin + +% This records the amount of indent in the innermost environment. +% That's how much \exdent should take out. +\newskip\exdentamount + +% This defn is used inside fill environments such as @defun. +\parseargdef\exdent{\hfil\break\hbox{\kern -\exdentamount{\rm#1}}\hfil\break} + +% This defn is used inside nofill environments such as @example. +\parseargdef\nofillexdent{{\advance \leftskip by -\exdentamount + \leftline{\hskip\leftskip{\rm#1}}}} + +% @inmargin{WHICH}{TEXT} puts TEXT in the WHICH margin next to the current +% paragraph. For more general purposes, use the \margin insertion +% class. WHICH is `l' or `r'. Not documented, written for gawk manual. +% +\newskip\inmarginspacing \inmarginspacing=1cm +\def\strutdepth{\dp\strutbox} +% +\def\doinmargin#1#2{\strut\vadjust{% + \nobreak + \kern-\strutdepth + \vtop to \strutdepth{% + \baselineskip=\strutdepth + \vss + % if you have multiple lines of stuff to put here, you'll need to + % make the vbox yourself of the appropriate size. + \ifx#1l% + \llap{\ignorespaces #2\hskip\inmarginspacing}% + \else + \rlap{\hskip\hsize \hskip\inmarginspacing \ignorespaces #2}% + \fi + \null + }% +}} +\def\inleftmargin{\doinmargin l} +\def\inrightmargin{\doinmargin r} +% +% @inmargin{TEXT [, RIGHT-TEXT]} +% (if RIGHT-TEXT is given, use TEXT for left page, RIGHT-TEXT for right; +% else use TEXT for both). +% +\def\inmargin#1{\parseinmargin #1,,\finish} +\def\parseinmargin#1,#2,#3\finish{% not perfect, but better than nothing. + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0 > 0pt + \def\lefttext{#1}% have both texts + \def\righttext{#2}% + \else + \def\lefttext{#1}% have only one text + \def\righttext{#1}% + \fi + % + \ifodd\pageno + \def\temp{\inrightmargin\righttext}% odd page -> outside is right margin + \else + \def\temp{\inleftmargin\lefttext}% + \fi + \temp +} + +% @include FILE -- \input text of FILE. +% +\def\include{\parseargusing\filenamecatcodes\includezzz} +\def\includezzz#1{% + \pushthisfilestack + \def\thisfile{#1}% + {% + \makevalueexpandable % we want to expand any @value in FILE. + \turnoffactive % and allow special characters in the expansion + \indexnofonts % Allow `@@' and other weird things in file names. + \wlog{texinfo.tex: doing @include of #1^^J}% + \edef\temp{\noexpand\input #1 }% + % + % This trickery is to read FILE outside of a group, in case it makes + % definitions, etc. + \expandafter + }\temp + \popthisfilestack +} +\def\filenamecatcodes{% + \catcode`\\=\other + \catcode`~=\other + \catcode`^=\other + \catcode`_=\other + \catcode`|=\other + \catcode`<=\other + \catcode`>=\other + \catcode`+=\other + \catcode`-=\other + \catcode`\`=\other + \catcode`\'=\other +} + +\def\pushthisfilestack{% + \expandafter\pushthisfilestackX\popthisfilestack\StackTerm +} +\def\pushthisfilestackX{% + \expandafter\pushthisfilestackY\thisfile\StackTerm +} +\def\pushthisfilestackY #1\StackTerm #2\StackTerm {% + \gdef\popthisfilestack{\gdef\thisfile{#1}\gdef\popthisfilestack{#2}}% +} + +\def\popthisfilestack{\errthisfilestackempty} +\def\errthisfilestackempty{\errmessage{Internal error: + the stack of filenames is empty.}} +% +\def\thisfile{} + +% @center line +% outputs that line, centered. +% +\parseargdef\center{% + \ifhmode + \let\centersub\centerH + \else + \let\centersub\centerV + \fi + \centersub{\hfil \ignorespaces#1\unskip \hfil}% + \let\centersub\relax % don't let the definition persist, just in case +} +\def\centerH#1{{% + \hfil\break + \advance\hsize by -\leftskip + \advance\hsize by -\rightskip + \line{#1}% + \break +}} +% +\newcount\centerpenalty +\def\centerV#1{% + % The idea here is the same as in \startdefun, \cartouche, etc.: if + % @center is the first thing after a section heading, we need to wipe + % out the negative parskip inserted by \sectionheading, but still + % prevent a page break here. + \centerpenalty = \lastpenalty + \ifnum\centerpenalty>10000 \vskip\parskip \fi + \ifnum\centerpenalty>9999 \penalty\centerpenalty \fi + \line{\kern\leftskip #1\kern\rightskip}% +} + +% @sp n outputs n lines of vertical space +% +\parseargdef\sp{\vskip #1\baselineskip} + +% @comment ...line which is ignored... +% @c is the same as @comment +% @ignore ... @end ignore is another way to write a comment +% +\def\comment{\begingroup \catcode`\^^M=\active% +\catcode`\@=\other \catcode`\{=\other \catcode`\}=\other\commentxxx}% + +{\catcode`\^^M=\active% +\gdef\commentxxx#1^^M{\endgroup% +\futurelet\nexttoken\commentxxxx}% +\gdef\commentxxxx{\ifx\nexttoken\aftermacro\expandafter\comment\fi}% +} + +\def\c{\begingroup \catcode`\^^M=\active% +\catcode`\@=\other \catcode`\{=\other \catcode`\}=\other% +\cxxx} +{\catcode`\^^M=\active \gdef\cxxx#1^^M{\endgroup}} +% See comment in \scanmacro about why the definitions of @c and @comment differ + +% @paragraphindent NCHARS +% We'll use ems for NCHARS, close enough. +% NCHARS can also be the word `asis' or `none'. +% We cannot feasibly implement @paragraphindent asis, though. +% +\def\asisword{asis} % no translation, these are keywords +\def\noneword{none} +% +\parseargdef\paragraphindent{% + \def\temp{#1}% + \ifx\temp\asisword + \else + \ifx\temp\noneword + \defaultparindent = 0pt + \else + \defaultparindent = #1em + \fi + \fi + \parindent = \defaultparindent +} + +% @exampleindent NCHARS +% We'll use ems for NCHARS like @paragraphindent. +% It seems @exampleindent asis isn't necessary, but +% I preserve it to make it similar to @paragraphindent. +\parseargdef\exampleindent{% + \def\temp{#1}% + \ifx\temp\asisword + \else + \ifx\temp\noneword + \lispnarrowing = 0pt + \else + \lispnarrowing = #1em + \fi + \fi +} + +% @firstparagraphindent WORD +% If WORD is `none', then suppress indentation of the first paragraph +% after a section heading. If WORD is `insert', then do indent at such +% paragraphs. +% +% The paragraph indentation is suppressed or not by calling +% \suppressfirstparagraphindent, which the sectioning commands do. +% We switch the definition of this back and forth according to WORD. +% By default, we suppress indentation. +% +\def\suppressfirstparagraphindent{\dosuppressfirstparagraphindent} +\def\insertword{insert} +% +\parseargdef\firstparagraphindent{% + \def\temp{#1}% + \ifx\temp\noneword + \let\suppressfirstparagraphindent = \dosuppressfirstparagraphindent + \else\ifx\temp\insertword + \let\suppressfirstparagraphindent = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @firstparagraphindent option `\temp'}% + \fi\fi +} + +% Here is how we actually suppress indentation. Redefine \everypar to +% \kern backwards by \parindent, and then reset itself to empty. +% +% We also make \indent itself not actually do anything until the next +% paragraph. +% +\gdef\dosuppressfirstparagraphindent{% + \gdef\indent {\restorefirstparagraphindent \indent}% + \gdef\noindent{\restorefirstparagraphindent \noindent}% + \global\everypar = {\kern -\parindent \restorefirstparagraphindent}% +} +% +\gdef\restorefirstparagraphindent{% + \global\let\indent = \ptexindent + \global\let\noindent = \ptexnoindent + \global\everypar = {}% +} + + +% @refill is a no-op. +\let\refill=\relax + +% @setfilename INFO-FILENAME - ignored +\let\setfilename=\comment + +% @bye. +\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend} + + +\message{pdf,} +% adobe `portable' document format +\newcount\tempnum +\newcount\lnkcount +\newtoks\filename +\newcount\filenamelength +\newcount\pgn +\newtoks\toksA +\newtoks\toksB +\newtoks\toksC +\newtoks\toksD +\newbox\boxA +\newbox\boxB +\newcount\countA +\newif\ifpdf +\newif\ifpdfmakepagedest + +% +% For LuaTeX +% + +\newif\iftxiuseunicodedestname +\txiuseunicodedestnamefalse % For pdfTeX etc. + +\ifx\luatexversion\thisisundefined +\else + % Use Unicode destination names + \txiuseunicodedestnametrue + % Escape PDF strings with converting UTF-16 from UTF-8 + \begingroup + \catcode`\%=12 + \directlua{ + function UTF16oct(str) + tex.sprint(string.char(0x5c) .. '376' .. string.char(0x5c) .. '377') + for c in string.utfvalues(str) do + if c < 0x10000 then + tex.sprint( + string.format(string.char(0x5c) .. string.char(0x25) .. '03o' .. + string.char(0x5c) .. string.char(0x25) .. '03o', + (c / 256), (c % 256))) + else + c = c - 0x10000 + local c_hi = c / 1024 + 0xd800 + local c_lo = c % 1024 + 0xdc00 + tex.sprint( + string.format(string.char(0x5c) .. string.char(0x25) .. '03o' .. + string.char(0x5c) .. string.char(0x25) .. '03o' .. + string.char(0x5c) .. string.char(0x25) .. '03o' .. + string.char(0x5c) .. string.char(0x25) .. '03o', + (c_hi / 256), (c_hi % 256), + (c_lo / 256), (c_lo % 256))) + end + end + end + } + \endgroup + \def\pdfescapestrutfsixteen#1{\directlua{UTF16oct('\luaescapestring{#1}')}} + % Escape PDF strings without converting + \begingroup + \directlua{ + function PDFescstr(str) + for c in string.bytes(str) do + if c <= 0x20 or c >= 0x80 or c == 0x28 or c == 0x29 or c == 0x5c then + tex.sprint( + string.format(string.char(0x5c) .. string.char(0x25) .. '03o', + c)) + else + tex.sprint(string.char(c)) + end + end + end + } + \endgroup + \def\pdfescapestring#1{\directlua{PDFescstr('\luaescapestring{#1}')}} + \ifnum\luatexversion>84 + % For LuaTeX >= 0.85 + \def\pdfdest{\pdfextension dest} + \let\pdfoutput\outputmode + \def\pdfliteral{\pdfextension literal} + \def\pdfcatalog{\pdfextension catalog} + \def\pdftexversion{\numexpr\pdffeedback version\relax} + \let\pdfximage\saveimageresource + \let\pdfrefximage\useimageresource + \let\pdflastximage\lastsavedimageresourceindex + \def\pdfendlink{\pdfextension endlink\relax} + \def\pdfoutline{\pdfextension outline} + \def\pdfstartlink{\pdfextension startlink} + \def\pdffontattr{\pdfextension fontattr} + \def\pdfobj{\pdfextension obj} + \def\pdflastobj{\numexpr\pdffeedback lastobj\relax} + \let\pdfpagewidth\pagewidth + \let\pdfpageheight\pageheight + \edef\pdfhorigin{\pdfvariable horigin} + \edef\pdfvorigin{\pdfvariable vorigin} + \fi +\fi + +% when pdftex is run in dvi mode, \pdfoutput is defined (so \pdfoutput=1 +% can be set). So we test for \relax and 0 as well as being undefined. +\ifx\pdfoutput\thisisundefined +\else + \ifx\pdfoutput\relax + \else + \ifcase\pdfoutput + \else + \pdftrue + \fi + \fi +\fi + +% PDF uses PostScript string constants for the names of xref targets, +% for display in the outlines, and in other places. Thus, we have to +% double any backslashes. Otherwise, a name like "\node" will be +% interpreted as a newline (\n), followed by o, d, e. Not good. +% +% See http://www.ntg.nl/pipermail/ntg-pdftex/2004-July/000654.html and +% related messages. The final outcome is that it is up to the TeX user +% to double the backslashes and otherwise make the string valid, so +% that's what we do. pdftex 1.30.0 (ca.2005) introduced a primitive to +% do this reliably, so we use it. + +% #1 is a control sequence in which to do the replacements, +% which we \xdef. +\def\txiescapepdf#1{% + \ifx\pdfescapestring\thisisundefined + % No primitive available; should we give a warning or log? + % Many times it won't matter. + \xdef#1{#1}% + \else + % The expandable \pdfescapestring primitive escapes parentheses, + % backslashes, and other special chars. + \xdef#1{\pdfescapestring{#1}}% + \fi +} +\def\txiescapepdfutfsixteen#1{% + \ifx\pdfescapestrutfsixteen\thisisundefined + % No UTF-16 converting macro available. + \txiescapepdf{#1}% + \else + \xdef#1{\pdfescapestrutfsixteen{#1}}% + \fi +} + +\newhelp\nopdfimagehelp{Texinfo supports .png, .jpg, .jpeg, and .pdf images +with PDF output, and none of those formats could be found. (.eps cannot +be supported due to the design of the PDF format; use regular TeX (DVI +output) for that.)} + +\ifpdf + % + % Color manipulation macros using ideas from pdfcolor.tex, + % except using rgb instead of cmyk; the latter is said to render as a + % very dark gray on-screen and a very dark halftone in print, instead + % of actual black. The dark red here is dark enough to print on paper as + % nearly black, but still distinguishable for online viewing. We use + % black by default, though. + \def\rgbDarkRed{0.50 0.09 0.12} + \def\rgbBlack{0 0 0} + % + % rg sets the color for filling (usual text, etc.); + % RG sets the color for stroking (thin rules, e.g., normal _'s). + \def\pdfsetcolor#1{\pdfliteral{#1 rg #1 RG}} + % + % Set color, and create a mark which defines \thiscolor accordingly, + % so that \makeheadline knows which color to restore. + \def\setcolor#1{% + \xdef\lastcolordefs{\gdef\noexpand\thiscolor{#1}}% + \domark + \pdfsetcolor{#1}% + } + % + \def\maincolor{\rgbBlack} + \pdfsetcolor{\maincolor} + \edef\thiscolor{\maincolor} + \def\lastcolordefs{} + % + \def\makefootline{% + \baselineskip24pt + \line{\pdfsetcolor{\maincolor}\the\footline}% + } + % + \def\makeheadline{% + \vbox to 0pt{% + \vskip-22.5pt + \line{% + \vbox to8.5pt{}% + % Extract \thiscolor definition from the marks. + \getcolormarks + % Typeset the headline with \maincolor, then restore the color. + \pdfsetcolor{\maincolor}\the\headline\pdfsetcolor{\thiscolor}% + }% + \vss + }% + \nointerlineskip + } + % + % + \pdfcatalog{/PageMode /UseOutlines} + % + % #1 is image name, #2 width (might be empty/whitespace), #3 height (ditto). + \def\dopdfimage#1#2#3{% + \def\pdfimagewidth{#2}\setbox0 = \hbox{\ignorespaces #2}% + \def\pdfimageheight{#3}\setbox2 = \hbox{\ignorespaces #3}% + % + % pdftex (and the PDF format) support .pdf, .png, .jpg (among + % others). Let's try in that order, PDF first since if + % someone has a scalable image, presumably better to use that than a + % bitmap. + \let\pdfimgext=\empty + \begingroup + \openin 1 #1.pdf \ifeof 1 + \openin 1 #1.PDF \ifeof 1 + \openin 1 #1.png \ifeof 1 + \openin 1 #1.jpg \ifeof 1 + \openin 1 #1.jpeg \ifeof 1 + \openin 1 #1.JPG \ifeof 1 + \errhelp = \nopdfimagehelp + \errmessage{Could not find image file #1 for pdf}% + \else \gdef\pdfimgext{JPG}% + \fi + \else \gdef\pdfimgext{jpeg}% + \fi + \else \gdef\pdfimgext{jpg}% + \fi + \else \gdef\pdfimgext{png}% + \fi + \else \gdef\pdfimgext{PDF}% + \fi + \else \gdef\pdfimgext{pdf}% + \fi + \closein 1 + \endgroup + % + % without \immediate, ancient pdftex seg faults when the same image is + % included twice. (Version 3.14159-pre-1.0-unofficial-20010704.) + \ifnum\pdftexversion < 14 + \immediate\pdfimage + \else + \immediate\pdfximage + \fi + \ifdim \wd0 >0pt width \pdfimagewidth \fi + \ifdim \wd2 >0pt height \pdfimageheight \fi + \ifnum\pdftexversion<13 + #1.\pdfimgext + \else + {#1.\pdfimgext}% + \fi + \ifnum\pdftexversion < 14 \else + \pdfrefximage \pdflastximage + \fi} + % + \def\setpdfdestname#1{{% + % We have to set dummies so commands such as @code, and characters + % such as \, aren't expanded when present in a section title. + \indexnofonts + \makevalueexpandable + \turnoffactive + \iftxiuseunicodedestname + \ifx \declaredencoding \latone + % Pass through Latin-1 characters. + % LuaTeX with byte wise I/O converts Latin-1 characters to Unicode. + \else + \ifx \declaredencoding \utfeight + % Pass through Unicode characters. + \else + % Use ASCII approximations in destination names. + \passthroughcharsfalse + \fi + \fi + \else + % Use ASCII approximations in destination names. + \passthroughcharsfalse + \fi + \def\pdfdestname{#1}% + \txiescapepdf\pdfdestname + }} + % + \def\setpdfoutlinetext#1{{% + \indexnofonts + \makevalueexpandable + \turnoffactive + \ifx \declaredencoding \latone + % The PDF format can use an extended form of Latin-1 in bookmark + % strings. See Appendix D of the PDF Reference, Sixth Edition, for + % the "PDFDocEncoding". + \passthroughcharstrue + % Pass through Latin-1 characters. + % LuaTeX: Convert to Unicode + % pdfTeX: Use Latin-1 as PDFDocEncoding + \def\pdfoutlinetext{#1}% + \else + \ifx \declaredencoding \utfeight + \ifx\luatexversion\thisisundefined + % For pdfTeX with UTF-8. + % TODO: the PDF format can use UTF-16 in bookmark strings, + % but the code for this isn't done yet. + % Use ASCII approximations. + \passthroughcharsfalse + \def\pdfoutlinetext{#1}% + \else + % For LuaTeX with UTF-8. + % Pass through Unicode characters for title texts. + \passthroughcharstrue + \def\pdfoutlinetext{#1}% + \fi + \else + % For non-Latin-1 or non-UTF-8 encodings. + % Use ASCII approximations. + \passthroughcharsfalse + \def\pdfoutlinetext{#1}% + \fi + \fi + % LuaTeX: Convert to UTF-16 + % pdfTeX: Use Latin-1 as PDFDocEncoding + \txiescapepdfutfsixteen\pdfoutlinetext + }} + % + \def\pdfmkdest#1{% + \setpdfdestname{#1}% + \safewhatsit{\pdfdest name{\pdfdestname} xyz}% + } + % + % used to mark target names; must be expandable. + \def\pdfmkpgn#1{#1} + % + % by default, use black for everything. + \def\urlcolor{\rgbBlack} + \def\linkcolor{\rgbBlack} + \def\endlink{\setcolor{\maincolor}\pdfendlink} + % + % Adding outlines to PDF; macros for calculating structure of outlines + % come from Petr Olsak + \def\expnumber#1{\expandafter\ifx\csname#1\endcsname\relax 0% + \else \csname#1\endcsname \fi} + \def\advancenumber#1{\tempnum=\expnumber{#1}\relax + \advance\tempnum by 1 + \expandafter\xdef\csname#1\endcsname{\the\tempnum}} + % + % #1 is the section text, which is what will be displayed in the + % outline by the pdf viewer. #2 is the pdf expression for the number + % of subentries (or empty, for subsubsections). #3 is the node text, + % which might be empty if this toc entry had no corresponding node. + % #4 is the page number + % + \def\dopdfoutline#1#2#3#4{% + % Generate a link to the node text if that exists; else, use the + % page number. We could generate a destination for the section + % text in the case where a section has no node, but it doesn't + % seem worth the trouble, since most documents are normally structured. + \setpdfoutlinetext{#1} + \setpdfdestname{#3} + \ifx\pdfdestname\empty + \def\pdfdestname{#4}% + \fi + % + \pdfoutline goto name{\pdfmkpgn{\pdfdestname}}#2{\pdfoutlinetext}% + } + % + \def\pdfmakeoutlines{% + \begingroup + % Read toc silently, to get counts of subentries for \pdfoutline. + \def\partentry##1##2##3##4{}% ignore parts in the outlines + \def\numchapentry##1##2##3##4{% + \def\thischapnum{##2}% + \def\thissecnum{0}% + \def\thissubsecnum{0}% + }% + \def\numsecentry##1##2##3##4{% + \advancenumber{chap\thischapnum}% + \def\thissecnum{##2}% + \def\thissubsecnum{0}% + }% + \def\numsubsecentry##1##2##3##4{% + \advancenumber{sec\thissecnum}% + \def\thissubsecnum{##2}% + }% + \def\numsubsubsecentry##1##2##3##4{% + \advancenumber{subsec\thissubsecnum}% + }% + \def\thischapnum{0}% + \def\thissecnum{0}% + \def\thissubsecnum{0}% + % + % use \def rather than \let here because we redefine \chapentry et + % al. a second time, below. + \def\appentry{\numchapentry}% + \def\appsecentry{\numsecentry}% + \def\appsubsecentry{\numsubsecentry}% + \def\appsubsubsecentry{\numsubsubsecentry}% + \def\unnchapentry{\numchapentry}% + \def\unnsecentry{\numsecentry}% + \def\unnsubsecentry{\numsubsecentry}% + \def\unnsubsubsecentry{\numsubsubsecentry}% + \readdatafile{toc}% + % + % Read toc second time, this time actually producing the outlines. + % The `-' means take the \expnumber as the absolute number of + % subentries, which we calculated on our first read of the .toc above. + % + % We use the node names as the destinations. + \def\numchapentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{chap##2}}{##3}{##4}}% + \def\numsecentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{sec##2}}{##3}{##4}}% + \def\numsubsecentry##1##2##3##4{% + \dopdfoutline{##1}{count-\expnumber{subsec##2}}{##3}{##4}}% + \def\numsubsubsecentry##1##2##3##4{% count is always zero + \dopdfoutline{##1}{}{##3}{##4}}% + % + % PDF outlines are displayed using system fonts, instead of + % document fonts. Therefore we cannot use special characters, + % since the encoding is unknown. For example, the eogonek from + % Latin 2 (0xea) gets translated to a | character. Info from + % Staszek Wawrykiewicz, 19 Jan 2004 04:09:24 +0100. + % + % TODO this right, we have to translate 8-bit characters to + % their "best" equivalent, based on the @documentencoding. Too + % much work for too little return. Just use the ASCII equivalents + % we use for the index sort strings. + % + \indexnofonts + \setupdatafile + % We can have normal brace characters in the PDF outlines, unlike + % Texinfo index files. So set that up. + \def\{{\lbracecharliteral}% + \def\}{\rbracecharliteral}% + \catcode`\\=\active \otherbackslash + \input \tocreadfilename + \endgroup + } + {\catcode`[=1 \catcode`]=2 + \catcode`{=\other \catcode`}=\other + \gdef\lbracecharliteral[{]% + \gdef\rbracecharliteral[}]% + ] + % + \def\skipspaces#1{\def\PP{#1}\def\D{|}% + \ifx\PP\D\let\nextsp\relax + \else\let\nextsp\skipspaces + \addtokens{\filename}{\PP}% + \advance\filenamelength by 1 + \fi + \nextsp} + \def\getfilename#1{% + \filenamelength=0 + % If we don't expand the argument now, \skipspaces will get + % snagged on things like "@value{foo}". + \edef\temp{#1}% + \expandafter\skipspaces\temp|\relax + } + \ifnum\pdftexversion < 14 + \let \startlink \pdfannotlink + \else + \let \startlink \pdfstartlink + \fi + % make a live url in pdf output. + \def\pdfurl#1{% + \begingroup + % it seems we really need yet another set of dummies; have not + % tried to figure out what each command should do in the context + % of @url. for now, just make @/ a no-op, that's the only one + % people have actually reported a problem with. + % + \normalturnoffactive + \def\@{@}% + \let\/=\empty + \makevalueexpandable + % do we want to go so far as to use \indexnofonts instead of just + % special-casing \var here? + \def\var##1{##1}% + % + \leavevmode\setcolor{\urlcolor}% + \startlink attr{/Border [0 0 0]}% + user{/Subtype /Link /A << /S /URI /URI (#1) >>}% + \endgroup} + \def\pdfgettoks#1.{\setbox\boxA=\hbox{\toksA={#1.}\toksB={}\maketoks}} + \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} + \def\adn#1{\addtokens{\toksC}{#1}\global\countA=1\let\next=\maketoks} + \def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}\toksA={#2}} + \def\maketoks{% + \expandafter\poptoks\the\toksA|ENDTOKS|\relax + \ifx\first0\adn0 + \else\ifx\first1\adn1 \else\ifx\first2\adn2 \else\ifx\first3\adn3 + \else\ifx\first4\adn4 \else\ifx\first5\adn5 \else\ifx\first6\adn6 + \else\ifx\first7\adn7 \else\ifx\first8\adn8 \else\ifx\first9\adn9 + \else + \ifnum0=\countA\else\makelink\fi + \ifx\first.\let\next=\done\else + \let\next=\maketoks + \addtokens{\toksB}{\the\toksD} + \ifx\first,\addtokens{\toksB}{\space}\fi + \fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \next} + \def\makelink{\addtokens{\toksB}% + {\noexpand\pdflink{\the\toksC}}\toksC={}\global\countA=0} + \def\pdflink#1{% + \startlink attr{/Border [0 0 0]} goto name{\pdfmkpgn{#1}} + \setcolor{\linkcolor}#1\endlink} + \def\done{\edef\st{\global\noexpand\toksA={\the\toksB}}\st} +\else + % non-pdf mode + \let\pdfmkdest = \gobble + \let\pdfurl = \gobble + \let\endlink = \relax + \let\setcolor = \gobble + \let\pdfsetcolor = \gobble + \let\pdfmakeoutlines = \relax +\fi % \ifx\pdfoutput + +% +% For XeTeX +% +\ifx\XeTeXrevision\thisisundefined +\else + % + % XeTeX version check + % + \ifnum\strcmp{\the\XeTeXversion\XeTeXrevision}{0.99996}>-1 + % TeX Live 2016 contains XeTeX 0.99996 and xdvipdfmx 20160307. + % It can use the `dvipdfmx:config' special (from TeX Live SVN r40941). + % For avoiding PDF destination name replacement, we use this special + % instead of xdvipdfmx's command line option `-C 0x0010'. + \special{dvipdfmx:config C 0x0010} + % XeTeX 0.99995+ comes with xdvipdfmx 20160307+. + % It can handle Unicode destination names for PDF. + \txiuseunicodedestnametrue + \else + % XeTeX < 0.99996 (TeX Live < 2016) cannot use the + % `dvipdfmx:config' special. + % So for avoiding PDF destination name replacement, + % xdvipdfmx's command line option `-C 0x0010' is necessary. + % + % XeTeX < 0.99995 can not handle Unicode destination names for PDF + % because xdvipdfmx 20150315 has a UTF-16 conversion issue. + % It is fixed by xdvipdfmx 20160106 (TeX Live SVN r39753). + \txiuseunicodedestnamefalse + \fi + % + % Color support + % + \def\rgbDarkRed{0.50 0.09 0.12} + \def\rgbBlack{0 0 0} + % + \def\pdfsetcolor#1{\special{pdf:scolor [#1]}} + % + % Set color, and create a mark which defines \thiscolor accordingly, + % so that \makeheadline knows which color to restore. + \def\setcolor#1{% + \xdef\lastcolordefs{\gdef\noexpand\thiscolor{#1}}% + \domark + \pdfsetcolor{#1}% + } + % + \def\maincolor{\rgbBlack} + \pdfsetcolor{\maincolor} + \edef\thiscolor{\maincolor} + \def\lastcolordefs{} + % + \def\makefootline{% + \baselineskip24pt + \line{\pdfsetcolor{\maincolor}\the\footline}% + } + % + \def\makeheadline{% + \vbox to 0pt{% + \vskip-22.5pt + \line{% + \vbox to8.5pt{}% + % Extract \thiscolor definition from the marks. + \getcolormarks + % Typeset the headline with \maincolor, then restore the color. + \pdfsetcolor{\maincolor}\the\headline\pdfsetcolor{\thiscolor}% + }% + \vss + }% + \nointerlineskip + } + % + % PDF outline support + % + % Emulate pdfTeX primitive + \def\pdfdest name#1 xyz{% + \special{pdf:dest (#1) [@thispage /XYZ @xpos @ypos null]}% + } + % + \def\setpdfdestname#1{{% + % We have to set dummies so commands such as @code, and characters + % such as \, aren't expanded when present in a section title. + \indexnofonts + \makevalueexpandable + \turnoffactive + \iftxiuseunicodedestname + % Pass through Unicode characters. + \else + % Use ASCII approximations in destination names. + \passthroughcharsfalse + \fi + \def\pdfdestname{#1}% + \txiescapepdf\pdfdestname + }} + % + \def\setpdfoutlinetext#1{{% + \turnoffactive + % Always use Unicode characters in title texts. + \def\pdfoutlinetext{#1}% + % For XeTeX, xdvipdfmx converts to UTF-16. + % So we do not convert. + \txiescapepdf\pdfoutlinetext + }} + % + \def\pdfmkdest#1{% + \setpdfdestname{#1}% + \safewhatsit{\pdfdest name{\pdfdestname} xyz}% + } + % + % by default, use black for everything. + \def\urlcolor{\rgbBlack} + \def\linkcolor{\rgbBlack} + \def\endlink{\setcolor{\maincolor}\pdfendlink} + % + \def\dopdfoutline#1#2#3#4{% + \setpdfoutlinetext{#1} + \setpdfdestname{#3} + \ifx\pdfdestname\empty + \def\pdfdestname{#4}% + \fi + % + \special{pdf:out [-] #2 << /Title (\pdfoutlinetext) /A + << /S /GoTo /D (\pdfdestname) >> >> }% + } + % + \def\pdfmakeoutlines{% + \begingroup + % + % For XeTeX, counts of subentries are not necessary. + % Therefore, we read toc only once. + % + % We use node names as destinations. + \def\partentry##1##2##3##4{}% ignore parts in the outlines + \def\numchapentry##1##2##3##4{% + \dopdfoutline{##1}{1}{##3}{##4}}% + \def\numsecentry##1##2##3##4{% + \dopdfoutline{##1}{2}{##3}{##4}}% + \def\numsubsecentry##1##2##3##4{% + \dopdfoutline{##1}{3}{##3}{##4}}% + \def\numsubsubsecentry##1##2##3##4{% + \dopdfoutline{##1}{4}{##3}{##4}}% + % + \let\appentry\numchapentry% + \let\appsecentry\numsecentry% + \let\appsubsecentry\numsubsecentry% + \let\appsubsubsecentry\numsubsubsecentry% + \let\unnchapentry\numchapentry% + \let\unnsecentry\numsecentry% + \let\unnsubsecentry\numsubsecentry% + \let\unnsubsubsecentry\numsubsubsecentry% + % + % For XeTeX, xdvipdfmx converts strings to UTF-16. + % Therefore, the encoding and the language may not be considered. + % + \indexnofonts + \setupdatafile + % We can have normal brace characters in the PDF outlines, unlike + % Texinfo index files. So set that up. + \def\{{\lbracecharliteral}% + \def\}{\rbracecharliteral}% + \catcode`\\=\active \otherbackslash + \input \tocreadfilename + \endgroup + } + {\catcode`[=1 \catcode`]=2 + \catcode`{=\other \catcode`}=\other + \gdef\lbracecharliteral[{]% + \gdef\rbracecharliteral[}]% + ] + + \special{pdf:docview << /PageMode /UseOutlines >> } + % ``\special{pdf:tounicode ...}'' is not necessary + % because xdvipdfmx converts strings from UTF-8 to UTF-16 without it. + % However, due to a UTF-16 conversion issue of xdvipdfmx 20150315, + % ``\special{pdf:dest ...}'' cannot handle non-ASCII strings. + % It is fixed by xdvipdfmx 20160106 (TeX Live SVN r39753). +% + \def\skipspaces#1{\def\PP{#1}\def\D{|}% + \ifx\PP\D\let\nextsp\relax + \else\let\nextsp\skipspaces + \addtokens{\filename}{\PP}% + \advance\filenamelength by 1 + \fi + \nextsp} + \def\getfilename#1{% + \filenamelength=0 + % If we don't expand the argument now, \skipspaces will get + % snagged on things like "@value{foo}". + \edef\temp{#1}% + \expandafter\skipspaces\temp|\relax + } + % make a live url in pdf output. + \def\pdfurl#1{% + \begingroup + % it seems we really need yet another set of dummies; have not + % tried to figure out what each command should do in the context + % of @url. for now, just make @/ a no-op, that's the only one + % people have actually reported a problem with. + % + \normalturnoffactive + \def\@{@}% + \let\/=\empty + \makevalueexpandable + % do we want to go so far as to use \indexnofonts instead of just + % special-casing \var here? + \def\var##1{##1}% + % + \leavevmode\setcolor{\urlcolor}% + \special{pdf:bann << /Border [0 0 0] + /Subtype /Link /A << /S /URI /URI (#1) >> >>}% + \endgroup} + \def\endlink{\setcolor{\maincolor}\special{pdf:eann}} + \def\pdfgettoks#1.{\setbox\boxA=\hbox{\toksA={#1.}\toksB={}\maketoks}} + \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} + \def\adn#1{\addtokens{\toksC}{#1}\global\countA=1\let\next=\maketoks} + \def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}\toksA={#2}} + \def\maketoks{% + \expandafter\poptoks\the\toksA|ENDTOKS|\relax + \ifx\first0\adn0 + \else\ifx\first1\adn1 \else\ifx\first2\adn2 \else\ifx\first3\adn3 + \else\ifx\first4\adn4 \else\ifx\first5\adn5 \else\ifx\first6\adn6 + \else\ifx\first7\adn7 \else\ifx\first8\adn8 \else\ifx\first9\adn9 + \else + \ifnum0=\countA\else\makelink\fi + \ifx\first.\let\next=\done\else + \let\next=\maketoks + \addtokens{\toksB}{\the\toksD} + \ifx\first,\addtokens{\toksB}{\space}\fi + \fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \next} + \def\makelink{\addtokens{\toksB}% + {\noexpand\pdflink{\the\toksC}}\toksC={}\global\countA=0} + \def\pdflink#1{% + \special{pdf:bann << /Border [0 0 0] + /Type /Annot /Subtype /Link /A << /S /GoTo /D (#1) >> >>}% + \setcolor{\linkcolor}#1\endlink} + \def\done{\edef\st{\global\noexpand\toksA={\the\toksB}}\st} +% + % + % @image support + % + % #1 is image name, #2 width (might be empty/whitespace), #3 height (ditto). + \def\doxeteximage#1#2#3{% + \def\xeteximagewidth{#2}\setbox0 = \hbox{\ignorespaces #2}% + \def\xeteximageheight{#3}\setbox2 = \hbox{\ignorespaces #3}% + % + % XeTeX (and the PDF format) supports .pdf, .png, .jpg (among + % others). Let's try in that order, PDF first since if + % someone has a scalable image, presumably better to use that than a + % bitmap. + \let\xeteximgext=\empty + \begingroup + \openin 1 #1.pdf \ifeof 1 + \openin 1 #1.PDF \ifeof 1 + \openin 1 #1.png \ifeof 1 + \openin 1 #1.jpg \ifeof 1 + \openin 1 #1.jpeg \ifeof 1 + \openin 1 #1.JPG \ifeof 1 + \errmessage{Could not find image file #1 for XeTeX}% + \else \gdef\xeteximgext{JPG}% + \fi + \else \gdef\xeteximgext{jpeg}% + \fi + \else \gdef\xeteximgext{jpg}% + \fi + \else \gdef\xeteximgext{png}% + \fi + \else \gdef\xeteximgext{PDF}% + \fi + \else \gdef\xeteximgext{pdf}% + \fi + \closein 1 + \endgroup + % + \def\xetexpdfext{pdf}% + \ifx\xeteximgext\xetexpdfext + \XeTeXpdffile "#1".\xeteximgext "" + \else + \def\xetexpdfext{PDF}% + \ifx\xeteximgext\xetexpdfext + \XeTeXpdffile "#1".\xeteximgext "" + \else + \XeTeXpicfile "#1".\xeteximgext "" + \fi + \fi + \ifdim \wd0 >0pt width \xeteximagewidth \fi + \ifdim \wd2 >0pt height \xeteximageheight \fi \relax + } +\fi + + +% +\message{fonts,} + +% Set the baselineskip to #1, and the lineskip and strut size +% correspondingly. There is no deep meaning behind these magic numbers +% used as factors; they just match (closely enough) what Knuth defined. +% +\def\lineskipfactor{.08333} +\def\strutheightpercent{.70833} +\def\strutdepthpercent {.29167} +% +% can get a sort of poor man's double spacing by redefining this. +\def\baselinefactor{1} +% +\newdimen\textleading +\def\setleading#1{% + \dimen0 = #1\relax + \normalbaselineskip = \baselinefactor\dimen0 + \normallineskip = \lineskipfactor\normalbaselineskip + \normalbaselines + \setbox\strutbox =\hbox{% + \vrule width0pt height\strutheightpercent\baselineskip + depth \strutdepthpercent \baselineskip + }% +} + +% PDF CMaps. See also LaTeX's t1.cmap. +% +% do nothing with this by default. +\expandafter\let\csname cmapOT1\endcsname\gobble +\expandafter\let\csname cmapOT1IT\endcsname\gobble +\expandafter\let\csname cmapOT1TT\endcsname\gobble + +% if we are producing pdf, and we have \pdffontattr, then define cmaps. +% (\pdffontattr was introduced many years ago, but people still run +% older pdftex's; it's easy to conditionalize, so we do.) +\ifpdf \ifx\pdffontattr\thisisundefined \else + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1-0) +%%Title: (TeX-OT1-0 TeX OT1 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1) +/Supplement 0 +>> def +/CMapName /TeX-OT1-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +8 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<23> <26> <0023> +<28> <3B> <0028> +<3F> <5B> <003F> +<5D> <5E> <005D> +<61> <7A> <0061> +<7B> <7C> <2013> +endbfrange +40 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <007E> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +% +% \cmapOT1IT + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1IT-0) +%%Title: (TeX-OT1IT-0 TeX OT1IT 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1IT) +/Supplement 0 +>> def +/CMapName /TeX-OT1IT-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +8 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<25> <26> <0025> +<28> <3B> <0028> +<3F> <5B> <003F> +<5D> <5E> <005D> +<61> <7A> <0061> +<7B> <7C> <2013> +endbfrange +42 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<23> <0023> +<24> <00A3> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <007E> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1IT\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +% +% \cmapOT1TT + \begingroup + \catcode`\^^M=\active \def^^M{^^J}% Output line endings as the ^^J char. + \catcode`\%=12 \immediate\pdfobj stream {%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-OT1TT-0) +%%Title: (TeX-OT1TT-0 TeX OT1TT 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (OT1TT) +/Supplement 0 +>> def +/CMapName /TeX-OT1TT-0 def +/CMapType 2 def +1 begincodespacerange +<00> <7F> +endcodespacerange +5 beginbfrange +<00> <01> <0393> +<09> <0A> <03A8> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> +endbfrange +32 beginbfchar +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<07> <03D2> +<08> <03A6> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + }\endgroup + \expandafter\edef\csname cmapOT1TT\endcsname#1{% + \pdffontattr#1{/ToUnicode \the\pdflastobj\space 0 R}% + }% +\fi\fi + + +% Set the font macro #1 to the font named \fontprefix#2. +% #3 is the font's design size, #4 is a scale factor, #5 is the CMap +% encoding (only OT1, OT1IT and OT1TT are allowed, or empty to omit). +% Example: +% #1 = \textrm +% #2 = \rmshape +% #3 = 10 +% #4 = \mainmagstep +% #5 = OT1 +% +\def\setfont#1#2#3#4#5{% + \font#1=\fontprefix#2#3 scaled #4 + \csname cmap#5\endcsname#1% +} +% This is what gets called when #5 of \setfont is empty. +\let\cmap\gobble +% +% (end of cmaps) + +% Use cm as the default font prefix. +% To specify the font prefix, you must define \fontprefix +% before you read in texinfo.tex. +\ifx\fontprefix\thisisundefined +\def\fontprefix{cm} +\fi +% Support font families that don't use the same naming scheme as CM. +\def\rmshape{r} +\def\rmbshape{bx} % where the normal face is bold +\def\bfshape{b} +\def\bxshape{bx} +\def\ttshape{tt} +\def\ttbshape{tt} +\def\ttslshape{sltt} +\def\itshape{ti} +\def\itbshape{bxti} +\def\slshape{sl} +\def\slbshape{bxsl} +\def\sfshape{ss} +\def\sfbshape{ss} +\def\scshape{csc} +\def\scbshape{csc} + +% Definitions for a main text size of 11pt. (The default in Texinfo.) +% +\def\definetextfontsizexi{% +% Text fonts (11.2pt, magstep1). +\def\textnominalsize{11pt} +\edef\mainmagstep{\magstephalf} +\setfont\textrm\rmshape{10}{\mainmagstep}{OT1} +\setfont\texttt\ttshape{10}{\mainmagstep}{OT1TT} +\setfont\textbf\bfshape{10}{\mainmagstep}{OT1} +\setfont\textit\itshape{10}{\mainmagstep}{OT1IT} +\setfont\textsl\slshape{10}{\mainmagstep}{OT1} +\setfont\textsf\sfshape{10}{\mainmagstep}{OT1} +\setfont\textsc\scshape{10}{\mainmagstep}{OT1} +\setfont\textttsl\ttslshape{10}{\mainmagstep}{OT1TT} +\font\texti=cmmi10 scaled \mainmagstep +\font\textsy=cmsy10 scaled \mainmagstep +\def\textecsize{1095} + +% A few fonts for @defun names and args. +\setfont\defbf\bfshape{10}{\magstep1}{OT1} +\setfont\deftt\ttshape{10}{\magstep1}{OT1TT} +\setfont\defsl\slshape{10}{\magstep1}{OT1TT} +\setfont\defttsl\ttslshape{10}{\magstep1}{OT1TT} +\def\df{\let\ttfont=\deftt \let\bffont = \defbf +\let\ttslfont=\defttsl \let\slfont=\defsl \bf} + +% Fonts for indices, footnotes, small examples (9pt). +\def\smallnominalsize{9pt} +\setfont\smallrm\rmshape{9}{1000}{OT1} +\setfont\smalltt\ttshape{9}{1000}{OT1TT} +\setfont\smallbf\bfshape{10}{900}{OT1} +\setfont\smallit\itshape{9}{1000}{OT1IT} +\setfont\smallsl\slshape{9}{1000}{OT1} +\setfont\smallsf\sfshape{9}{1000}{OT1} +\setfont\smallsc\scshape{10}{900}{OT1} +\setfont\smallttsl\ttslshape{10}{900}{OT1TT} +\font\smalli=cmmi9 +\font\smallsy=cmsy9 +\def\smallecsize{0900} + +% Fonts for small examples (8pt). +\def\smallernominalsize{8pt} +\setfont\smallerrm\rmshape{8}{1000}{OT1} +\setfont\smallertt\ttshape{8}{1000}{OT1TT} +\setfont\smallerbf\bfshape{10}{800}{OT1} +\setfont\smallerit\itshape{8}{1000}{OT1IT} +\setfont\smallersl\slshape{8}{1000}{OT1} +\setfont\smallersf\sfshape{8}{1000}{OT1} +\setfont\smallersc\scshape{10}{800}{OT1} +\setfont\smallerttsl\ttslshape{10}{800}{OT1TT} +\font\smalleri=cmmi8 +\font\smallersy=cmsy8 +\def\smallerecsize{0800} + +% Fonts for title page (20.4pt): +\def\titlenominalsize{20pt} +\setfont\titlerm\rmbshape{12}{\magstep3}{OT1} +\setfont\titleit\itbshape{10}{\magstep4}{OT1IT} +\setfont\titlesl\slbshape{10}{\magstep4}{OT1} +\setfont\titlett\ttbshape{12}{\magstep3}{OT1TT} +\setfont\titlettsl\ttslshape{10}{\magstep4}{OT1TT} +\setfont\titlesf\sfbshape{17}{\magstep1}{OT1} +\let\titlebf=\titlerm +\setfont\titlesc\scbshape{10}{\magstep4}{OT1} +\font\titlei=cmmi12 scaled \magstep3 +\font\titlesy=cmsy10 scaled \magstep4 +\def\titleecsize{2074} + +% Chapter (and unnumbered) fonts (17.28pt). +\def\chapnominalsize{17pt} +\setfont\chaprm\rmbshape{12}{\magstep2}{OT1} +\setfont\chapit\itbshape{10}{\magstep3}{OT1IT} +\setfont\chapsl\slbshape{10}{\magstep3}{OT1} +\setfont\chaptt\ttbshape{12}{\magstep2}{OT1TT} +\setfont\chapttsl\ttslshape{10}{\magstep3}{OT1TT} +\setfont\chapsf\sfbshape{17}{1000}{OT1} +\let\chapbf=\chaprm +\setfont\chapsc\scbshape{10}{\magstep3}{OT1} +\font\chapi=cmmi12 scaled \magstep2 +\font\chapsy=cmsy10 scaled \magstep3 +\def\chapecsize{1728} + +% Section fonts (14.4pt). +\def\secnominalsize{14pt} +\setfont\secrm\rmbshape{12}{\magstep1}{OT1} +\setfont\secrmnotbold\rmshape{12}{\magstep1}{OT1} +\setfont\secit\itbshape{10}{\magstep2}{OT1IT} +\setfont\secsl\slbshape{10}{\magstep2}{OT1} +\setfont\sectt\ttbshape{12}{\magstep1}{OT1TT} +\setfont\secttsl\ttslshape{10}{\magstep2}{OT1TT} +\setfont\secsf\sfbshape{12}{\magstep1}{OT1} +\let\secbf\secrm +\setfont\secsc\scbshape{10}{\magstep2}{OT1} +\font\seci=cmmi12 scaled \magstep1 +\font\secsy=cmsy10 scaled \magstep2 +\def\sececsize{1440} + +% Subsection fonts (13.15pt). +\def\ssecnominalsize{13pt} +\setfont\ssecrm\rmbshape{12}{\magstephalf}{OT1} +\setfont\ssecit\itbshape{10}{1315}{OT1IT} +\setfont\ssecsl\slbshape{10}{1315}{OT1} +\setfont\ssectt\ttbshape{12}{\magstephalf}{OT1TT} +\setfont\ssecttsl\ttslshape{10}{1315}{OT1TT} +\setfont\ssecsf\sfbshape{12}{\magstephalf}{OT1} +\let\ssecbf\ssecrm +\setfont\ssecsc\scbshape{10}{1315}{OT1} +\font\sseci=cmmi12 scaled \magstephalf +\font\ssecsy=cmsy10 scaled 1315 +\def\ssececsize{1200} + +% Reduced fonts for @acronym in text (10pt). +\def\reducednominalsize{10pt} +\setfont\reducedrm\rmshape{10}{1000}{OT1} +\setfont\reducedtt\ttshape{10}{1000}{OT1TT} +\setfont\reducedbf\bfshape{10}{1000}{OT1} +\setfont\reducedit\itshape{10}{1000}{OT1IT} +\setfont\reducedsl\slshape{10}{1000}{OT1} +\setfont\reducedsf\sfshape{10}{1000}{OT1} +\setfont\reducedsc\scshape{10}{1000}{OT1} +\setfont\reducedttsl\ttslshape{10}{1000}{OT1TT} +\font\reducedi=cmmi10 +\font\reducedsy=cmsy10 +\def\reducedecsize{1000} + +\textleading = 13.2pt % line spacing for 11pt CM +\textfonts % reset the current fonts +\rm +} % end of 11pt text font size definitions, \definetextfontsizexi + + +% Definitions to make the main text be 10pt Computer Modern, with +% section, chapter, etc., sizes following suit. This is for the GNU +% Press printing of the Emacs 22 manual. Maybe other manuals in the +% future. Used with @smallbook, which sets the leading to 12pt. +% +\def\definetextfontsizex{% +% Text fonts (10pt). +\def\textnominalsize{10pt} +\edef\mainmagstep{1000} +\setfont\textrm\rmshape{10}{\mainmagstep}{OT1} +\setfont\texttt\ttshape{10}{\mainmagstep}{OT1TT} +\setfont\textbf\bfshape{10}{\mainmagstep}{OT1} +\setfont\textit\itshape{10}{\mainmagstep}{OT1IT} +\setfont\textsl\slshape{10}{\mainmagstep}{OT1} +\setfont\textsf\sfshape{10}{\mainmagstep}{OT1} +\setfont\textsc\scshape{10}{\mainmagstep}{OT1} +\setfont\textttsl\ttslshape{10}{\mainmagstep}{OT1TT} +\font\texti=cmmi10 scaled \mainmagstep +\font\textsy=cmsy10 scaled \mainmagstep +\def\textecsize{1000} + +% A few fonts for @defun names and args. +\setfont\defbf\bfshape{10}{\magstephalf}{OT1} +\setfont\deftt\ttshape{10}{\magstephalf}{OT1TT} +\setfont\defsl\slshape{10}{\magstephalf}{OT1TT} +\setfont\defttsl\ttslshape{10}{\magstephalf}{OT1TT} +\def\df{\let\ttfont=\deftt \let\bffont = \defbf +\let\slfont=\defsl \let\ttslfont=\defttsl \bf} + +% Fonts for indices, footnotes, small examples (9pt). +\def\smallnominalsize{9pt} +\setfont\smallrm\rmshape{9}{1000}{OT1} +\setfont\smalltt\ttshape{9}{1000}{OT1TT} +\setfont\smallbf\bfshape{10}{900}{OT1} +\setfont\smallit\itshape{9}{1000}{OT1IT} +\setfont\smallsl\slshape{9}{1000}{OT1} +\setfont\smallsf\sfshape{9}{1000}{OT1} +\setfont\smallsc\scshape{10}{900}{OT1} +\setfont\smallttsl\ttslshape{10}{900}{OT1TT} +\font\smalli=cmmi9 +\font\smallsy=cmsy9 +\def\smallecsize{0900} + +% Fonts for small examples (8pt). +\def\smallernominalsize{8pt} +\setfont\smallerrm\rmshape{8}{1000}{OT1} +\setfont\smallertt\ttshape{8}{1000}{OT1TT} +\setfont\smallerbf\bfshape{10}{800}{OT1} +\setfont\smallerit\itshape{8}{1000}{OT1IT} +\setfont\smallersl\slshape{8}{1000}{OT1} +\setfont\smallersf\sfshape{8}{1000}{OT1} +\setfont\smallersc\scshape{10}{800}{OT1} +\setfont\smallerttsl\ttslshape{10}{800}{OT1TT} +\font\smalleri=cmmi8 +\font\smallersy=cmsy8 +\def\smallerecsize{0800} + +% Fonts for title page (20.4pt): +\def\titlenominalsize{20pt} +\setfont\titlerm\rmbshape{12}{\magstep3}{OT1} +\setfont\titleit\itbshape{10}{\magstep4}{OT1IT} +\setfont\titlesl\slbshape{10}{\magstep4}{OT1} +\setfont\titlett\ttbshape{12}{\magstep3}{OT1TT} +\setfont\titlettsl\ttslshape{10}{\magstep4}{OT1TT} +\setfont\titlesf\sfbshape{17}{\magstep1}{OT1} +\let\titlebf=\titlerm +\setfont\titlesc\scbshape{10}{\magstep4}{OT1} +\font\titlei=cmmi12 scaled \magstep3 +\font\titlesy=cmsy10 scaled \magstep4 +\def\titleecsize{2074} + +% Chapter fonts (14.4pt). +\def\chapnominalsize{14pt} +\setfont\chaprm\rmbshape{12}{\magstep1}{OT1} +\setfont\chapit\itbshape{10}{\magstep2}{OT1IT} +\setfont\chapsl\slbshape{10}{\magstep2}{OT1} +\setfont\chaptt\ttbshape{12}{\magstep1}{OT1TT} +\setfont\chapttsl\ttslshape{10}{\magstep2}{OT1TT} +\setfont\chapsf\sfbshape{12}{\magstep1}{OT1} +\let\chapbf\chaprm +\setfont\chapsc\scbshape{10}{\magstep2}{OT1} +\font\chapi=cmmi12 scaled \magstep1 +\font\chapsy=cmsy10 scaled \magstep2 +\def\chapecsize{1440} + +% Section fonts (12pt). +\def\secnominalsize{12pt} +\setfont\secrm\rmbshape{12}{1000}{OT1} +\setfont\secit\itbshape{10}{\magstep1}{OT1IT} +\setfont\secsl\slbshape{10}{\magstep1}{OT1} +\setfont\sectt\ttbshape{12}{1000}{OT1TT} +\setfont\secttsl\ttslshape{10}{\magstep1}{OT1TT} +\setfont\secsf\sfbshape{12}{1000}{OT1} +\let\secbf\secrm +\setfont\secsc\scbshape{10}{\magstep1}{OT1} +\font\seci=cmmi12 +\font\secsy=cmsy10 scaled \magstep1 +\def\sececsize{1200} + +% Subsection fonts (10pt). +\def\ssecnominalsize{10pt} +\setfont\ssecrm\rmbshape{10}{1000}{OT1} +\setfont\ssecit\itbshape{10}{1000}{OT1IT} +\setfont\ssecsl\slbshape{10}{1000}{OT1} +\setfont\ssectt\ttbshape{10}{1000}{OT1TT} +\setfont\ssecttsl\ttslshape{10}{1000}{OT1TT} +\setfont\ssecsf\sfbshape{10}{1000}{OT1} +\let\ssecbf\ssecrm +\setfont\ssecsc\scbshape{10}{1000}{OT1} +\font\sseci=cmmi10 +\font\ssecsy=cmsy10 +\def\ssececsize{1000} + +% Reduced fonts for @acronym in text (9pt). +\def\reducednominalsize{9pt} +\setfont\reducedrm\rmshape{9}{1000}{OT1} +\setfont\reducedtt\ttshape{9}{1000}{OT1TT} +\setfont\reducedbf\bfshape{10}{900}{OT1} +\setfont\reducedit\itshape{9}{1000}{OT1IT} +\setfont\reducedsl\slshape{9}{1000}{OT1} +\setfont\reducedsf\sfshape{9}{1000}{OT1} +\setfont\reducedsc\scshape{10}{900}{OT1} +\setfont\reducedttsl\ttslshape{10}{900}{OT1TT} +\font\reducedi=cmmi9 +\font\reducedsy=cmsy9 +\def\reducedecsize{0900} + +\divide\parskip by 2 % reduce space between paragraphs +\textleading = 12pt % line spacing for 10pt CM +\textfonts % reset the current fonts +\rm +} % end of 10pt text font size definitions, \definetextfontsizex + +% Fonts for short table of contents. +\setfont\shortcontrm\rmshape{12}{1000}{OT1} +\setfont\shortcontbf\bfshape{10}{\magstep1}{OT1} % no cmb12 +\setfont\shortcontsl\slshape{12}{1000}{OT1} +\setfont\shortconttt\ttshape{12}{1000}{OT1TT} + + +% We provide the user-level command +% @fonttextsize 10 +% (or 11) to redefine the text font size. pt is assumed. +% +\def\xiword{11} +\def\xword{10} +\def\xwordpt{10pt} +% +\parseargdef\fonttextsize{% + \def\textsizearg{#1}% + %\wlog{doing @fonttextsize \textsizearg}% + % + % Set \globaldefs so that documents can use this inside @tex, since + % makeinfo 4.8 does not support it, but we need it nonetheless. + % + \begingroup \globaldefs=1 + \ifx\textsizearg\xword \definetextfontsizex + \else \ifx\textsizearg\xiword \definetextfontsizexi + \else + \errhelp=\EMsimple + \errmessage{@fonttextsize only supports `10' or `11', not `\textsizearg'} + \fi\fi + \endgroup +} + +% +% Change the current font style to #1, remembering it in \curfontstyle. +% For now, we do not accumulate font styles: @b{@i{foo}} prints foo in +% italics, not bold italics. +% +\def\setfontstyle#1{% + \def\curfontstyle{#1}% not as a control sequence, because we are \edef'd. + \csname #1font\endcsname % change the current font +} + +\def\rm{\fam=0 \setfontstyle{rm}} +\def\it{\fam=\itfam \setfontstyle{it}} +\def\sl{\fam=\slfam \setfontstyle{sl}} +\def\bf{\fam=\bffam \setfontstyle{bf}}\def\bfstylename{bf} +\def\tt{\fam=\ttfam \setfontstyle{tt}} + +% Texinfo sort of supports the sans serif font style, which plain TeX does not. +% So we set up a \sf. +\newfam\sffam +\def\sf{\fam=\sffam \setfontstyle{sf}} + +% We don't need math for this font style. +\def\ttsl{\setfontstyle{ttsl}} + + +% In order for the font changes to affect most math symbols and letters, +% we have to define the \textfont of the standard families. We don't +% bother to reset \scriptfont and \scriptscriptfont; awaiting user need. +% +\def\resetmathfonts{% + \textfont0=\rmfont \textfont1=\ifont \textfont2=\syfont + \textfont\itfam=\itfont \textfont\slfam=\slfont \textfont\bffam=\bffont + \textfont\ttfam=\ttfont \textfont\sffam=\sffont +} + +% + +% The font-changing commands (all called \...fonts) redefine the meanings +% of \STYLEfont, instead of just \STYLE. We do this because \STYLE needs +% to also set the current \fam for math mode. Our \STYLE (e.g., \rm) +% commands hardwire \STYLEfont to set the current font. +% +% Each font-changing command also sets the names \lsize (one size lower) +% and \lllsize (three sizes lower). These relative commands are used +% in, e.g., the LaTeX logo and acronyms. +% +% This all needs generalizing, badly. +% + +\def\assignfonts#1{% + \expandafter\let\expandafter\rmfont\csname #1rm\endcsname + \expandafter\let\expandafter\itfont\csname #1it\endcsname + \expandafter\let\expandafter\slfont\csname #1sl\endcsname + \expandafter\let\expandafter\bffont\csname #1bf\endcsname + \expandafter\let\expandafter\ttfont\csname #1tt\endcsname + \expandafter\let\expandafter\smallcaps\csname #1sc\endcsname + \expandafter\let\expandafter\sffont \csname #1sf\endcsname + \expandafter\let\expandafter\ifont \csname #1i\endcsname + \expandafter\let\expandafter\syfont \csname #1sy\endcsname + \expandafter\let\expandafter\ttslfont\csname #1ttsl\endcsname +} + +\newif\ifrmisbold + +% Select smaller font size with the current style. Used to change font size +% in, e.g., the LaTeX logo and acronyms. If we are using bold fonts for +% normal roman text, also use bold fonts for roman text in the smaller size. +\def\switchtolllsize{% + \expandafter\assignfonts\expandafter{\lllsize}% + \ifrmisbold + \let\rmfont\bffont + \fi + \csname\curfontstyle\endcsname +}% + +\def\switchtolsize{% + \expandafter\assignfonts\expandafter{\lsize}% + \ifrmisbold + \let\rmfont\bffont + \fi + \csname\curfontstyle\endcsname +}% + +\def\definefontsetatsize#1#2#3#4#5{% +\expandafter\def\csname #1fonts\endcsname{% + \def\curfontsize{#1}% + \def\lsize{#2}\def\lllsize{#3}% + \csname rmisbold#5\endcsname + \assignfonts{#1}% + \resetmathfonts + \setleading{#4}% +}} + +\definefontsetatsize{text} {reduced}{smaller}{\textleading}{false} +\definefontsetatsize{title} {chap} {subsec} {27pt} {true} +\definefontsetatsize{chap} {sec} {text} {19pt} {true} +\definefontsetatsize{sec} {subsec} {reduced}{17pt} {true} +\definefontsetatsize{ssec} {text} {small} {15pt} {true} +\definefontsetatsize{reduced}{small} {smaller}{10.5pt}{false} +\definefontsetatsize{small} {smaller}{smaller}{10.5pt}{false} +\definefontsetatsize{smaller}{smaller}{smaller}{9.5pt} {false} + +\def\titlefont#1{{\titlefonts\rm #1}} +\let\subsecfonts = \ssecfonts +\let\subsubsecfonts = \ssecfonts + +% Define these just so they can be easily changed for other fonts. +\def\angleleft{$\langle$} +\def\angleright{$\rangle$} + +% Set the fonts to use with the @small... environments. +\let\smallexamplefonts = \smallfonts + +% About \smallexamplefonts. If we use \smallfonts (9pt), @smallexample +% can fit this many characters: +% 8.5x11=86 smallbook=72 a4=90 a5=69 +% If we use \scriptfonts (8pt), then we can fit this many characters: +% 8.5x11=90+ smallbook=80 a4=90+ a5=77 +% For me, subjectively, the few extra characters that fit aren't worth +% the additional smallness of 8pt. So I'm making the default 9pt. +% +% By the way, for comparison, here's what fits with @example (10pt): +% 8.5x11=71 smallbook=60 a4=75 a5=58 +% --karl, 24jan03. + +% Set up the default fonts, so we can use them for creating boxes. +% +\definetextfontsizexi + + +\message{markup,} + +% Check if we are currently using a typewriter font. Since all the +% Computer Modern typewriter fonts have zero interword stretch (and +% shrink), and it is reasonable to expect all typewriter fonts to have +% this property, we can check that font parameter. +% +\def\ifmonospace{\ifdim\fontdimen3\font=0pt } + +% Markup style infrastructure. \defmarkupstylesetup\INITMACRO will +% define and register \INITMACRO to be called on markup style changes. +% \INITMACRO can check \currentmarkupstyle for the innermost +% style and the set of \ifmarkupSTYLE switches for all styles +% currently in effect. +\newif\ifmarkupvar +\newif\ifmarkupsamp +\newif\ifmarkupkey +%\newif\ifmarkupfile % @file == @samp. +%\newif\ifmarkupoption % @option == @samp. +\newif\ifmarkupcode +\newif\ifmarkupkbd +%\newif\ifmarkupenv % @env == @code. +%\newif\ifmarkupcommand % @command == @code. +\newif\ifmarkuptex % @tex (and part of @math, for now). +\newif\ifmarkupexample +\newif\ifmarkupverb +\newif\ifmarkupverbatim + +\let\currentmarkupstyle\empty + +\def\setupmarkupstyle#1{% + \csname markup#1true\endcsname + \def\currentmarkupstyle{#1}% + \markupstylesetup +} + +\let\markupstylesetup\empty + +\def\defmarkupstylesetup#1{% + \expandafter\def\expandafter\markupstylesetup + \expandafter{\markupstylesetup #1}% + \def#1% +} + +% Markup style setup for left and right quotes. +\defmarkupstylesetup\markupsetuplq{% + \expandafter\let\expandafter \temp + \csname markupsetuplq\currentmarkupstyle\endcsname + \ifx\temp\relax \markupsetuplqdefault \else \temp \fi +} + +\defmarkupstylesetup\markupsetuprq{% + \expandafter\let\expandafter \temp + \csname markupsetuprq\currentmarkupstyle\endcsname + \ifx\temp\relax \markupsetuprqdefault \else \temp \fi +} + +{ +\catcode`\'=\active +\catcode`\`=\active + +\gdef\markupsetuplqdefault{\let`\lq} +\gdef\markupsetuprqdefault{\let'\rq} + +\gdef\markupsetcodequoteleft{\let`\codequoteleft} +\gdef\markupsetcodequoteright{\let'\codequoteright} +} + +\let\markupsetuplqcode \markupsetcodequoteleft +\let\markupsetuprqcode \markupsetcodequoteright +% +\let\markupsetuplqexample \markupsetcodequoteleft +\let\markupsetuprqexample \markupsetcodequoteright +% +\let\markupsetuplqkbd \markupsetcodequoteleft +\let\markupsetuprqkbd \markupsetcodequoteright +% +\let\markupsetuplqsamp \markupsetcodequoteleft +\let\markupsetuprqsamp \markupsetcodequoteright +% +\let\markupsetuplqverb \markupsetcodequoteleft +\let\markupsetuprqverb \markupsetcodequoteright +% +\let\markupsetuplqverbatim \markupsetcodequoteleft +\let\markupsetuprqverbatim \markupsetcodequoteright + +% Allow an option to not use regular directed right quote/apostrophe +% (char 0x27), but instead the undirected quote from cmtt (char 0x0d). +% The undirected quote is ugly, so don't make it the default, but it +% works for pasting with more pdf viewers (at least evince), the +% lilypond developers report. xpdf does work with the regular 0x27. +% +\def\codequoteright{% + \expandafter\ifx\csname SETtxicodequoteundirected\endcsname\relax + \expandafter\ifx\csname SETcodequoteundirected\endcsname\relax + '% + \else \char'15 \fi + \else \char'15 \fi +} +% +% and a similar option for the left quote char vs. a grave accent. +% Modern fonts display ASCII 0x60 as a grave accent, so some people like +% the code environments to do likewise. +% +\def\codequoteleft{% + \expandafter\ifx\csname SETtxicodequotebacktick\endcsname\relax + \expandafter\ifx\csname SETcodequotebacktick\endcsname\relax + % [Knuth] pp. 380,381,391 + % \relax disables Spanish ligatures ?` and !` of \tt font. + \relax`% + \else \char'22 \fi + \else \char'22 \fi +} + +% Commands to set the quote options. +% +\parseargdef\codequoteundirected{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxicodequoteundirected\endcsname + = t% + \else\ifx\temp\offword + \expandafter\let\csname SETtxicodequoteundirected\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @codequoteundirected value `\temp', must be on|off}% + \fi\fi +} +% +\parseargdef\codequotebacktick{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxicodequotebacktick\endcsname + = t% + \else\ifx\temp\offword + \expandafter\let\csname SETtxicodequotebacktick\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @codequotebacktick value `\temp', must be on|off}% + \fi\fi +} + +% [Knuth] pp. 380,381,391, disable Spanish ligatures ?` and !` of \tt font. +\def\noligaturesquoteleft{\relax\lq} + +% Count depth in font-changes, for error checks +\newcount\fontdepth \fontdepth=0 + +% Font commands. + +% #1 is the font command (\sl or \it), #2 is the text to slant. +% If we are in a monospaced environment, however, 1) always use \ttsl, +% and 2) do not add an italic correction. +\def\dosmartslant#1#2{% + \ifusingtt + {{\ttsl #2}\let\next=\relax}% + {\def\next{{#1#2}\futurelet\next\smartitaliccorrection}}% + \next +} +\def\smartslanted{\dosmartslant\sl} +\def\smartitalic{\dosmartslant\it} + +% Output an italic correction unless \next (presumed to be the following +% character) is such as not to need one. +\def\smartitaliccorrection{% + \ifx\next,% + \else\ifx\next-% + \else\ifx\next.% + \else\ifx\next\.% + \else\ifx\next\comma% + \else\ptexslash + \fi\fi\fi\fi\fi + \aftersmartic +} + +% Unconditional use \ttsl, and no ic. @var is set to this for defuns. +\def\ttslanted#1{{\ttsl #1}} + +% @cite is like \smartslanted except unconditionally use \sl. We never want +% ttsl for book titles, do we? +\def\cite#1{{\sl #1}\futurelet\next\smartitaliccorrection} + +\def\aftersmartic{} +\def\var#1{% + \let\saveaftersmartic = \aftersmartic + \def\aftersmartic{\null\let\aftersmartic=\saveaftersmartic}% + \smartslanted{#1}% +} + +\let\i=\smartitalic +\let\slanted=\smartslanted +\let\dfn=\smartslanted +\let\emph=\smartitalic + +% Explicit font changes: @r, @sc, undocumented @ii. +\def\r#1{{\rm #1}} % roman font +\def\sc#1{{\smallcaps#1}} % smallcaps font +\def\ii#1{{\it #1}} % italic font + +% @b, explicit bold. Also @strong. +\def\b#1{{\bf #1}} +\let\strong=\b + +% @sansserif, explicit sans. +\def\sansserif#1{{\sf #1}} + +% We can't just use \exhyphenpenalty, because that only has effect at +% the end of a paragraph. Restore normal hyphenation at the end of the +% group within which \nohyphenation is presumably called. +% +\def\nohyphenation{\hyphenchar\font = -1 \aftergroup\restorehyphenation} +\def\restorehyphenation{\hyphenchar\font = `- } + +% Set sfcode to normal for the chars that usually have another value. +% Can't use plain's \frenchspacing because it uses the `\x notation, and +% sometimes \x has an active definition that messes things up. +% +\catcode`@=11 + \def\plainfrenchspacing{% + \sfcode`\.=\@m \sfcode`\?=\@m \sfcode`\!=\@m + \sfcode`\:=\@m \sfcode`\;=\@m \sfcode`\,=\@m + \def\endofsentencespacefactor{1000}% for @. and friends + } + \def\plainnonfrenchspacing{% + \sfcode`\.3000\sfcode`\?3000\sfcode`\!3000 + \sfcode`\:2000\sfcode`\;1500\sfcode`\,1250 + \def\endofsentencespacefactor{3000}% for @. and friends + } +\catcode`@=\other +\def\endofsentencespacefactor{3000}% default + +% @t, explicit typewriter. +\def\t#1{% + {\tt \rawbackslash \plainfrenchspacing #1}% + \null +} + +% @samp. +\def\samp#1{{\setupmarkupstyle{samp}\lq\tclose{#1}\rq\null}} + +% @indicateurl is \samp, that is, with quotes. +\let\indicateurl=\samp + +% @code (and similar) prints in typewriter, but with spaces the same +% size as normal in the surrounding text, without hyphenation, etc. +% This is a subroutine for that. +\def\tclose#1{% + {% + % Change normal interword space to be same as for the current font. + \spaceskip = \fontdimen2\font + % + % Switch to typewriter. + \tt + % + % But `\ ' produces the large typewriter interword space. + \def\ {{\spaceskip = 0pt{} }}% + % + % Turn off hyphenation. + \nohyphenation + % + \rawbackslash + \plainfrenchspacing + #1% + }% + \null % reset spacefactor to 1000 +} + +% We *must* turn on hyphenation at `-' and `_' in @code. +% (But see \codedashfinish below.) +% Otherwise, it is too hard to avoid overfull hboxes +% in the Emacs manual, the Library manual, etc. +% +% Unfortunately, TeX uses one parameter (\hyphenchar) to control +% both hyphenation at - and hyphenation within words. +% We must therefore turn them both off (\tclose does that) +% and arrange explicitly to hyphenate at a dash. -- rms. +{ + \catcode`\-=\active \catcode`\_=\active + \catcode`\'=\active \catcode`\`=\active + \global\let'=\rq \global\let`=\lq % default definitions + % + \global\def\code{\begingroup + \setupmarkupstyle{code}% + % The following should really be moved into \setupmarkupstyle handlers. + \catcode\dashChar=\active \catcode\underChar=\active + \ifallowcodebreaks + \let-\codedash + \let_\codeunder + \else + \let-\normaldash + \let_\realunder + \fi + % Given -foo (with a single dash), we do not want to allow a break + % after the hyphen. + \global\let\codedashprev=\codedash + % + \codex + } + % + \gdef\codedash{\futurelet\next\codedashfinish} + \gdef\codedashfinish{% + \normaldash % always output the dash character itself. + % + % Now, output a discretionary to allow a line break, unless + % (a) the next character is a -, or + % (b) the preceding character is a -. + % E.g., given --posix, we do not want to allow a break after either -. + % Given --foo-bar, we do want to allow a break between the - and the b. + \ifx\next\codedash \else + \ifx\codedashprev\codedash + \else \discretionary{}{}{}\fi + \fi + % we need the space after the = for the case when \next itself is a + % space token; it would get swallowed otherwise. As in @code{- a}. + \global\let\codedashprev= \next + } +} +\def\normaldash{-} +% +\def\codex #1{\tclose{#1}\endgroup} + +\def\codeunder{% + % this is all so @math{@code{var_name}+1} can work. In math mode, _ + % is "active" (mathcode"8000) and \normalunderscore (or \char95, etc.) + % will therefore expand the active definition of _, which is us + % (inside @code that is), therefore an endless loop. + \ifusingtt{\ifmmode + \mathchar"075F % class 0=ordinary, family 7=ttfam, pos 0x5F=_. + \else\normalunderscore \fi + \discretionary{}{}{}}% + {\_}% +} + +% An additional complication: the above will allow breaks after, e.g., +% each of the four underscores in __typeof__. This is bad. +% @allowcodebreaks provides a document-level way to turn breaking at - +% and _ on and off. +% +\newif\ifallowcodebreaks \allowcodebreakstrue + +\def\keywordtrue{true} +\def\keywordfalse{false} + +\parseargdef\allowcodebreaks{% + \def\txiarg{#1}% + \ifx\txiarg\keywordtrue + \allowcodebreakstrue + \else\ifx\txiarg\keywordfalse + \allowcodebreaksfalse + \else + \errhelp = \EMsimple + \errmessage{Unknown @allowcodebreaks option `\txiarg', must be true|false}% + \fi\fi +} + +% For @command, @env, @file, @option quotes seem unnecessary, +% so use \code rather than \samp. +\let\command=\code +\let\env=\code +\let\file=\code +\let\option=\code + +% @uref (abbreviation for `urlref') aka @url takes an optional +% (comma-separated) second argument specifying the text to display and +% an optional third arg as text to display instead of (rather than in +% addition to) the url itself. First (mandatory) arg is the url. + +% TeX-only option to allow changing PDF output to show only the second +% arg (if given), and not the url (which is then just the link target). +\newif\ifurefurlonlylink + +% The main macro is \urefbreak, which allows breaking at expected +% places within the url. (There used to be another version, which +% didn't support automatic breaking.) +\def\urefbreak{\begingroup \urefcatcodes \dourefbreak} +\let\uref=\urefbreak +% +\def\dourefbreak#1{\urefbreakfinish #1,,,\finish} +\def\urefbreakfinish#1,#2,#3,#4\finish{% doesn't work in @example + \unsepspaces + \pdfurl{#1}% + \setbox0 = \hbox{\ignorespaces #3}% + \ifdim\wd0 > 0pt + \unhbox0 % third arg given, show only that + \else + \setbox0 = \hbox{\ignorespaces #2}% look for second arg + \ifdim\wd0 > 0pt + \ifpdf + % For pdfTeX and LuaTeX + \ifurefurlonlylink + % PDF plus option to not display url, show just arg + \unhbox0 + \else + % PDF, normally display both arg and url for consistency, + % visibility, if the pdf is eventually used to print, etc. + \unhbox0\ (\urefcode{#1})% + \fi + \else + \ifx\XeTeXrevision\thisisundefined + \unhbox0\ (\urefcode{#1})% DVI, always show arg and url + \else + % For XeTeX + \ifurefurlonlylink + % PDF plus option to not display url, show just arg + \unhbox0 + \else + % PDF, normally display both arg and url for consistency, + % visibility, if the pdf is eventually used to print, etc. + \unhbox0\ (\urefcode{#1})% + \fi + \fi + \fi + \else + \urefcode{#1}% only url given, so show it + \fi + \fi + \endlink +\endgroup} + +% Allow line breaks around only a few characters (only). +\def\urefcatcodes{% + \catcode`\&=\active \catcode`\.=\active + \catcode`\#=\active \catcode`\?=\active + \catcode`\/=\active +} +{ + \urefcatcodes + % + \global\def\urefcode{\begingroup + \setupmarkupstyle{code}% + \urefcatcodes + \let&\urefcodeamp + \let.\urefcodedot + \let#\urefcodehash + \let?\urefcodequest + \let/\urefcodeslash + \codex + } + % + % By default, they are just regular characters. + \global\def&{\normalamp} + \global\def.{\normaldot} + \global\def#{\normalhash} + \global\def?{\normalquest} + \global\def/{\normalslash} +} + +% we put a little stretch before and after the breakable chars, to help +% line breaking of long url's. The unequal skips make look better in +% cmtt at least, especially for dots. +\def\urefprestretchamount{.13em} +\def\urefpoststretchamount{.1em} +\def\urefprestretch{\urefprebreak \hskip0pt plus\urefprestretchamount\relax} +\def\urefpoststretch{\urefpostbreak \hskip0pt plus\urefprestretchamount\relax} +% +\def\urefcodeamp{\urefprestretch \&\urefpoststretch} +\def\urefcodedot{\urefprestretch .\urefpoststretch} +\def\urefcodehash{\urefprestretch \#\urefpoststretch} +\def\urefcodequest{\urefprestretch ?\urefpoststretch} +\def\urefcodeslash{\futurelet\next\urefcodeslashfinish} +{ + \catcode`\/=\active + \global\def\urefcodeslashfinish{% + \urefprestretch \slashChar + % Allow line break only after the final / in a sequence of + % slashes, to avoid line break between the slashes in http://. + \ifx\next/\else \urefpoststretch \fi + } +} + +% One more complication: by default we'll break after the special +% characters, but some people like to break before the special chars, so +% allow that. Also allow no breaking at all, for manual control. +% +\parseargdef\urefbreakstyle{% + \def\txiarg{#1}% + \ifx\txiarg\wordnone + \def\urefprebreak{\nobreak}\def\urefpostbreak{\nobreak} + \else\ifx\txiarg\wordbefore + \def\urefprebreak{\allowbreak}\def\urefpostbreak{\nobreak} + \else\ifx\txiarg\wordafter + \def\urefprebreak{\nobreak}\def\urefpostbreak{\allowbreak} + \else + \errhelp = \EMsimple + \errmessage{Unknown @urefbreakstyle setting `\txiarg'}% + \fi\fi\fi +} +\def\wordafter{after} +\def\wordbefore{before} +\def\wordnone{none} + +\urefbreakstyle after + +% @url synonym for @uref, since that's how everyone uses it. +% +\let\url=\uref + +% rms does not like angle brackets --karl, 17may97. +% So now @email is just like @uref, unless we are pdf. +% +%\def\email#1{\angleleft{\tt #1}\angleright} +\ifpdf + \def\email#1{\doemail#1,,\finish} + \def\doemail#1,#2,#3\finish{\begingroup + \unsepspaces + \pdfurl{mailto:#1}% + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0>0pt\unhbox0\else\code{#1}\fi + \endlink + \endgroup} +\else + \ifx\XeTeXrevision\thisisundefined + \let\email=\uref + \else + \def\email#1{\doemail#1,,\finish} + \def\doemail#1,#2,#3\finish{\begingroup + \unsepspaces + \pdfurl{mailto:#1}% + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0>0pt\unhbox0\else\code{#1}\fi + \endlink + \endgroup} + \fi +\fi + +% @kbdinputstyle -- arg is `distinct' (@kbd uses slanted tty font always), +% `example' (@kbd uses ttsl only inside of @example and friends), +% or `code' (@kbd uses normal tty font always). +\parseargdef\kbdinputstyle{% + \def\txiarg{#1}% + \ifx\txiarg\worddistinct + \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\ttsl}% + \else\ifx\txiarg\wordexample + \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\tt}% + \else\ifx\txiarg\wordcode + \gdef\kbdexamplefont{\tt}\gdef\kbdfont{\tt}% + \else + \errhelp = \EMsimple + \errmessage{Unknown @kbdinputstyle setting `\txiarg'}% + \fi\fi\fi +} +\def\worddistinct{distinct} +\def\wordexample{example} +\def\wordcode{code} + +% Default is `distinct'. +\kbdinputstyle distinct + +% @kbd is like @code, except that if the argument is just one @key command, +% then @kbd has no effect. +\def\kbd#1{{\def\look{#1}\expandafter\kbdsub\look??\par}} + +\def\xkey{\key} +\def\kbdsub#1#2#3\par{% + \def\one{#1}\def\three{#3}\def\threex{??}% + \ifx\one\xkey\ifx\threex\three \key{#2}% + \else{\tclose{\kbdfont\setupmarkupstyle{kbd}\look}}\fi + \else{\tclose{\kbdfont\setupmarkupstyle{kbd}\look}}\fi +} + +% definition of @key that produces a lozenge. Doesn't adjust to text size. +%\setfont\keyrm\rmshape{8}{1000}{OT1} +%\font\keysy=cmsy9 +%\def\key#1{{\keyrm\textfont2=\keysy \leavevmode\hbox{% +% \raise0.4pt\hbox{\angleleft}\kern-.08em\vtop{% +% \vbox{\hrule\kern-0.4pt +% \hbox{\raise0.4pt\hbox{\vphantom{\angleleft}}#1}}% +% \kern-0.4pt\hrule}% +% \kern-.06em\raise0.4pt\hbox{\angleright}}}} + +% definition of @key with no lozenge. If the current font is already +% monospace, don't change it; that way, we respect @kbdinputstyle. But +% if it isn't monospace, then use \tt. +% +\def\key#1{{\setupmarkupstyle{key}% + \nohyphenation + \ifmonospace\else\tt\fi + #1}\null} + +% @clicksequence{File @click{} Open ...} +\def\clicksequence#1{\begingroup #1\endgroup} + +% @clickstyle @arrow (by default) +\parseargdef\clickstyle{\def\click{#1}} +\def\click{\arrow} + +% Typeset a dimension, e.g., `in' or `pt'. The only reason for the +% argument is to make the input look right: @dmn{pt} instead of @dmn{}pt. +% +\def\dmn#1{\thinspace #1} + +% @acronym for "FBI", "NATO", and the like. +% We print this one point size smaller, since it's intended for +% all-uppercase. +% +\def\acronym#1{\doacronym #1,,\finish} +\def\doacronym#1,#2,#3\finish{% + {\switchtolsize #1}% + \def\temp{#2}% + \ifx\temp\empty \else + \space ({\unsepspaces \ignorespaces \temp \unskip})% + \fi + \null % reset \spacefactor=1000 +} + +% @abbr for "Comput. J." and the like. +% No font change, but don't do end-of-sentence spacing. +% +\def\abbr#1{\doabbr #1,,\finish} +\def\doabbr#1,#2,#3\finish{% + {\plainfrenchspacing #1}% + \def\temp{#2}% + \ifx\temp\empty \else + \space ({\unsepspaces \ignorespaces \temp \unskip})% + \fi + \null % reset \spacefactor=1000 +} + +% @asis just yields its argument. Used with @table, for example. +% +\def\asis#1{#1} + +% @math outputs its argument in math mode. +% +% One complication: _ usually means subscripts, but it could also mean +% an actual _ character, as in @math{@var{some_variable} + 1}. So make +% _ active, and distinguish by seeing if the current family is \slfam, +% which is what @var uses. +{ + \catcode`\_ = \active + \gdef\mathunderscore{% + \catcode`\_=\active + \def_{\ifnum\fam=\slfam \_\else\sb\fi}% + } +} +% Another complication: we want \\ (and @\) to output a math (or tt) \. +% FYI, plain.tex uses \\ as a temporary control sequence (for no +% particular reason), but this is not advertised and we don't care. +% +% The \mathchar is class=0=ordinary, family=7=ttfam, position=5C=\. +\def\mathbackslash{\ifnum\fam=\ttfam \mathchar"075C \else\backslash \fi} +% +\def\math{% + \ifmmode\else % only go into math if not in math mode already + \tex + \mathunderscore + \let\\ = \mathbackslash + \mathactive + % make the texinfo accent commands work in math mode + \let\"=\ddot + \let\'=\acute + \let\==\bar + \let\^=\hat + \let\`=\grave + \let\u=\breve + \let\v=\check + \let\~=\tilde + \let\dotaccent=\dot + % have to provide another name for sup operator + \let\mathopsup=\sup + $\expandafter\finishmath\fi +} +\def\finishmath#1{#1$\endgroup} % Close the group opened by \tex. + +% Some active characters (such as <) are spaced differently in math. +% We have to reset their definitions in case the @math was an argument +% to a command which sets the catcodes (such as @item or @section). +% +{ + \catcode`^ = \active + \catcode`< = \active + \catcode`> = \active + \catcode`+ = \active + \catcode`' = \active + \gdef\mathactive{% + \let^ = \ptexhat + \let< = \ptexless + \let> = \ptexgtr + \let+ = \ptexplus + \let' = \ptexquoteright + } +} + +% for @sub and @sup, if in math mode, just do a normal sub/superscript. +% If in text, use math to place as sub/superscript, but switch +% into text mode, with smaller fonts. This is a different font than the +% one used for real math sub/superscripts (8pt vs. 7pt), but let's not +% fix it (significant additions to font machinery) until someone notices. +% +\def\sub{\ifmmode \expandafter\sb \else \expandafter\finishsub\fi} +\def\finishsub#1{$\sb{\hbox{\switchtolllsize #1}}$}% +% +\def\sup{\ifmmode \expandafter\ptexsp \else \expandafter\finishsup\fi} +\def\finishsup#1{$\ptexsp{\hbox{\switchtolllsize #1}}$}% + +% @inlinefmt{FMTNAME,PROCESSED-TEXT} and @inlineraw{FMTNAME,RAW-TEXT}. +% Ignore unless FMTNAME == tex; then it is like @iftex and @tex, +% except specified as a normal braced arg, so no newlines to worry about. +% +\def\outfmtnametex{tex} +% +\long\def\inlinefmt#1{\doinlinefmt #1,\finish} +\long\def\doinlinefmt#1,#2,\finish{% + \def\inlinefmtname{#1}% + \ifx\inlinefmtname\outfmtnametex \ignorespaces #2\fi +} +% +% @inlinefmtifelse{FMTNAME,THEN-TEXT,ELSE-TEXT} expands THEN-TEXT if +% FMTNAME is tex, else ELSE-TEXT. +\long\def\inlinefmtifelse#1{\doinlinefmtifelse #1,,,\finish} +\long\def\doinlinefmtifelse#1,#2,#3,#4,\finish{% + \def\inlinefmtname{#1}% + \ifx\inlinefmtname\outfmtnametex \ignorespaces #2\else \ignorespaces #3\fi +} +% +% For raw, must switch into @tex before parsing the argument, to avoid +% setting catcodes prematurely. Doing it this way means that, for +% example, @inlineraw{html, foo{bar} gets a parse error instead of being +% ignored. But this isn't important because if people want a literal +% *right* brace they would have to use a command anyway, so they may as +% well use a command to get a left brace too. We could re-use the +% delimiter character idea from \verb, but it seems like overkill. +% +\long\def\inlineraw{\tex \doinlineraw} +\long\def\doinlineraw#1{\doinlinerawtwo #1,\finish} +\def\doinlinerawtwo#1,#2,\finish{% + \def\inlinerawname{#1}% + \ifx\inlinerawname\outfmtnametex \ignorespaces #2\fi + \endgroup % close group opened by \tex. +} + +% @inlineifset{VAR, TEXT} expands TEXT if VAR is @set. +% +\long\def\inlineifset#1{\doinlineifset #1,\finish} +\long\def\doinlineifset#1,#2,\finish{% + \def\inlinevarname{#1}% + \expandafter\ifx\csname SET\inlinevarname\endcsname\relax + \else\ignorespaces#2\fi +} + +% @inlineifclear{VAR, TEXT} expands TEXT if VAR is not @set. +% +\long\def\inlineifclear#1{\doinlineifclear #1,\finish} +\long\def\doinlineifclear#1,#2,\finish{% + \def\inlinevarname{#1}% + \expandafter\ifx\csname SET\inlinevarname\endcsname\relax \ignorespaces#2\fi +} + + +\message{glyphs,} +% and logos. + +% @@ prints an @, as does @atchar{}. +\def\@{\char64 } +\let\atchar=\@ + +% @{ @} @lbracechar{} @rbracechar{} all generate brace characters. +\def\lbracechar{{\ifmonospace\char123\else\ensuremath\lbrace\fi}} +\def\rbracechar{{\ifmonospace\char125\else\ensuremath\rbrace\fi}} +\let\{=\lbracechar +\let\}=\rbracechar + +% @comma{} to avoid , parsing problems. +\let\comma = , + +% Accents: @, @dotaccent @ringaccent @ubaraccent @udotaccent +% Others are defined by plain TeX: @` @' @" @^ @~ @= @u @v @H. +\let\, = \ptexc +\let\dotaccent = \ptexdot +\def\ringaccent#1{{\accent23 #1}} +\let\tieaccent = \ptext +\let\ubaraccent = \ptexb +\let\udotaccent = \d + +% Other special characters: @questiondown @exclamdown @ordf @ordm +% Plain TeX defines: @AA @AE @O @OE @L (plus lowercase versions) @ss. +\def\questiondown{?`} +\def\exclamdown{!`} +\def\ordf{\leavevmode\raise1ex\hbox{\switchtolllsize \underbar{a}}} +\def\ordm{\leavevmode\raise1ex\hbox{\switchtolllsize \underbar{o}}} + +% Dotless i and dotless j, used for accents. +\def\imacro{i} +\def\jmacro{j} +\def\dotless#1{% + \def\temp{#1}% + \ifx\temp\imacro \ifmmode\imath \else\ptexi \fi + \else\ifx\temp\jmacro \ifmmode\jmath \else\j \fi + \else \errmessage{@dotless can be used only with i or j}% + \fi\fi +} + +% The \TeX{} logo, as in plain, but resetting the spacing so that a +% period following counts as ending a sentence. (Idea found in latex.) +% +\edef\TeX{\TeX \spacefactor=1000 } + +% @LaTeX{} logo. Not quite the same results as the definition in +% latex.ltx, since we use a different font for the raised A; it's most +% convenient for us to use an explicitly smaller font, rather than using +% the \scriptstyle font (since we don't reset \scriptstyle and +% \scriptscriptstyle). +% +\def\LaTeX{% + L\kern-.36em + {\setbox0=\hbox{T}% + \vbox to \ht0{\hbox{% + \ifx\textnominalsize\xwordpt + % for 10pt running text, lllsize (8pt) is too small for the A in LaTeX. + % Revert to plain's \scriptsize, which is 7pt. + \count255=\the\fam $\fam\count255 \scriptstyle A$% + \else + % For 11pt, we can use our lllsize. + \switchtolllsize A% + \fi + }% + \vss + }}% + \kern-.15em + \TeX +} + +% Some math mode symbols. Define \ensuremath to switch into math mode +% unless we are already there. Expansion tricks may not be needed here, +% but safer, and can't hurt. +\def\ensuremath{\ifmmode \expandafter\asis \else\expandafter\ensuredmath \fi} +\def\ensuredmath#1{$\relax#1$} +% +\def\bullet{\ensuremath\ptexbullet} +\def\geq{\ensuremath\ge} +\def\leq{\ensuremath\le} +\def\minus{\ensuremath-} + +% @dots{} outputs an ellipsis using the current font. +% We do .5em per period so that it has the same spacing in the cm +% typewriter fonts as three actual period characters; on the other hand, +% in other typewriter fonts three periods are wider than 1.5em. So do +% whichever is larger. +% +\def\dots{% + \leavevmode + \setbox0=\hbox{...}% get width of three periods + \ifdim\wd0 > 1.5em + \dimen0 = \wd0 + \else + \dimen0 = 1.5em + \fi + \hbox to \dimen0{% + \hskip 0pt plus.25fil + .\hskip 0pt plus1fil + .\hskip 0pt plus1fil + .\hskip 0pt plus.5fil + }% +} + +% @enddots{} is an end-of-sentence ellipsis. +% +\def\enddots{% + \dots + \spacefactor=\endofsentencespacefactor +} + +% @point{}, @result{}, @expansion{}, @print{}, @equiv{}. +% +% Since these characters are used in examples, they should be an even number of +% \tt widths. Each \tt character is 1en, so two makes it 1em. +% +\def\point{$\star$} +\def\arrow{\leavevmode\raise.05ex\hbox to 1em{\hfil$\rightarrow$\hfil}} +\def\result{\leavevmode\raise.05ex\hbox to 1em{\hfil$\Rightarrow$\hfil}} +\def\expansion{\leavevmode\hbox to 1em{\hfil$\mapsto$\hfil}} +\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}} +\def\equiv{\leavevmode\hbox to 1em{\hfil$\ptexequiv$\hfil}} + +% The @error{} command. +% Adapted from the TeXbook's \boxit. +% +\newbox\errorbox +% +{\ttfont \global\dimen0 = 3em}% Width of the box. +\dimen2 = .55pt % Thickness of rules +% The text. (`r' is open on the right, `e' somewhat less so on the left.) +\setbox0 = \hbox{\kern-.75pt \reducedsf \putworderror\kern-1.5pt} +% +\setbox\errorbox=\hbox to \dimen0{\hfil + \hsize = \dimen0 \advance\hsize by -5.8pt % Space to left+right. + \advance\hsize by -2\dimen2 % Rules. + \vbox{% + \hrule height\dimen2 + \hbox{\vrule width\dimen2 \kern3pt % Space to left of text. + \vtop{\kern2.4pt \box0 \kern2.4pt}% Space above/below. + \kern3pt\vrule width\dimen2}% Space to right. + \hrule height\dimen2} + \hfil} +% +\def\error{\leavevmode\lower.7ex\copy\errorbox} + +% @pounds{} is a sterling sign, which Knuth put in the CM italic font. +% +\def\pounds{{\it\$}} + +% @euro{} comes from a separate font, depending on the current style. +% We use the free feym* fonts from the eurosym package by Henrik +% Theiling, which support regular, slanted, bold and bold slanted (and +% "outlined" (blackboard board, sort of) versions, which we don't need). +% It is available from http://www.ctan.org/tex-archive/fonts/eurosym. +% +% Although only regular is the truly official Euro symbol, we ignore +% that. The Euro is designed to be slightly taller than the regular +% font height. +% +% feymr - regular +% feymo - slanted +% feybr - bold +% feybo - bold slanted +% +% There is no good (free) typewriter version, to my knowledge. +% A feymr10 euro is ~7.3pt wide, while a normal cmtt10 char is ~5.25pt wide. +% Hmm. +% +% Also doesn't work in math. Do we need to do math with euro symbols? +% Hope not. +% +% +\def\euro{{\eurofont e}} +\def\eurofont{% + % We set the font at each command, rather than predefining it in + % \textfonts and the other font-switching commands, so that + % installations which never need the symbol don't have to have the + % font installed. + % + % There is only one designed size (nominal 10pt), so we always scale + % that to the current nominal size. + % + % By the way, simply using "at 1em" works for cmr10 and the like, but + % does not work for cmbx10 and other extended/shrunken fonts. + % + \def\eurosize{\csname\curfontsize nominalsize\endcsname}% + % + \ifx\curfontstyle\bfstylename + % bold: + \font\thiseurofont = \ifusingit{feybo10}{feybr10} at \eurosize + \else + % regular: + \font\thiseurofont = \ifusingit{feymo10}{feymr10} at \eurosize + \fi + \thiseurofont +} + +% Glyphs from the EC fonts. We don't use \let for the aliases, because +% sometimes we redefine the original macro, and the alias should reflect +% the redefinition. +% +% Use LaTeX names for the Icelandic letters. +\def\DH{{\ecfont \char"D0}} % Eth +\def\dh{{\ecfont \char"F0}} % eth +\def\TH{{\ecfont \char"DE}} % Thorn +\def\th{{\ecfont \char"FE}} % thorn +% +\def\guillemetleft{{\ecfont \char"13}} +\def\guillemotleft{\guillemetleft} +\def\guillemetright{{\ecfont \char"14}} +\def\guillemotright{\guillemetright} +\def\guilsinglleft{{\ecfont \char"0E}} +\def\guilsinglright{{\ecfont \char"0F}} +\def\quotedblbase{{\ecfont \char"12}} +\def\quotesinglbase{{\ecfont \char"0D}} +% +% This positioning is not perfect (see the ogonek LaTeX package), but +% we have the precomposed glyphs for the most common cases. We put the +% tests to use those glyphs in the single \ogonek macro so we have fewer +% dummy definitions to worry about for index entries, etc. +% +% ogonek is also used with other letters in Lithuanian (IOU), but using +% the precomposed glyphs for those is not so easy since they aren't in +% the same EC font. +\def\ogonek#1{{% + \def\temp{#1}% + \ifx\temp\macrocharA\Aogonek + \else\ifx\temp\macrochara\aogonek + \else\ifx\temp\macrocharE\Eogonek + \else\ifx\temp\macrochare\eogonek + \else + \ecfont \setbox0=\hbox{#1}% + \ifdim\ht0=1ex\accent"0C #1% + \else\ooalign{\unhbox0\crcr\hidewidth\char"0C \hidewidth}% + \fi + \fi\fi\fi\fi + }% +} +\def\Aogonek{{\ecfont \char"81}}\def\macrocharA{A} +\def\aogonek{{\ecfont \char"A1}}\def\macrochara{a} +\def\Eogonek{{\ecfont \char"86}}\def\macrocharE{E} +\def\eogonek{{\ecfont \char"A6}}\def\macrochare{e} +% +% Use the European Computer Modern fonts (cm-super in outline format) +% for non-CM glyphs. That is ec* for regular text and tc* for the text +% companion symbols (LaTeX TS1 encoding). Both are part of the ec +% package and follow the same conventions. +% +\def\ecfont{\etcfont{e}} +\def\tcfont{\etcfont{t}} +% +\def\etcfont#1{% + % We can't distinguish serif/sans and italic/slanted, but this + % is used for crude hacks anyway (like adding French and German + % quotes to documents typeset with CM, where we lose kerning), so + % hopefully nobody will notice/care. + \edef\ecsize{\csname\curfontsize ecsize\endcsname}% + \edef\nominalsize{\csname\curfontsize nominalsize\endcsname}% + \ifmonospace + % typewriter: + \font\thisecfont = #1ctt\ecsize \space at \nominalsize + \else + \ifx\curfontstyle\bfstylename + % bold: + \font\thisecfont = #1cb\ifusingit{i}{x}\ecsize \space at \nominalsize + \else + % regular: + \font\thisecfont = #1c\ifusingit{ti}{rm}\ecsize \space at \nominalsize + \fi + \fi + \thisecfont +} + +% @registeredsymbol - R in a circle. The font for the R should really +% be smaller yet, but lllsize is the best we can do for now. +% Adapted from the plain.tex definition of \copyright. +% +\def\registeredsymbol{% + $^{{\ooalign{\hfil\raise.07ex\hbox{\switchtolllsize R}% + \hfil\crcr\Orb}}% + }$% +} + +% @textdegree - the normal degrees sign. +% +\def\textdegree{$^\circ$} + +% Laurent Siebenmann reports \Orb undefined with: +% Textures 1.7.7 (preloaded format=plain 93.10.14) (68K) 16 APR 2004 02:38 +% so we'll define it if necessary. +% +\ifx\Orb\thisisundefined +\def\Orb{\mathhexbox20D} +\fi + +% Quotes. +\chardef\quotedblleft="5C +\chardef\quotedblright=`\" +\chardef\quoteleft=`\` +\chardef\quoteright=`\' + + +\message{page headings,} + +\newskip\titlepagetopglue \titlepagetopglue = 1.5in +\newskip\titlepagebottomglue \titlepagebottomglue = 2pc + +% First the title page. Must do @settitle before @titlepage. +\newif\ifseenauthor +\newif\iffinishedtitlepage + +% @setcontentsaftertitlepage used to do an implicit @contents or +% @shortcontents after @end titlepage, but it is now obsolete. +\def\setcontentsaftertitlepage{% + \errmessage{@setcontentsaftertitlepage has been removed as a Texinfo + command; move your @contents command if you want the contents + after the title page.}}% +\def\setshortcontentsaftertitlepage{% + \errmessage{@setshortcontentsaftertitlepage has been removed as a Texinfo + command; move your @shortcontents and @contents commands if you + want the contents after the title page.}}% + +\parseargdef\shorttitlepage{% + \begingroup \hbox{}\vskip 1.5in \chaprm \centerline{#1}% + \endgroup\page\hbox{}\page} + +\envdef\titlepage{% + % Open one extra group, as we want to close it in the middle of \Etitlepage. + \begingroup + \parindent=0pt \textfonts + % Leave some space at the very top of the page. + \vglue\titlepagetopglue + % No rule at page bottom unless we print one at the top with @title. + \finishedtitlepagetrue + % + % Most title ``pages'' are actually two pages long, with space + % at the top of the second. We don't want the ragged left on the second. + \let\oldpage = \page + \def\page{% + \iffinishedtitlepage\else + \finishtitlepage + \fi + \let\page = \oldpage + \page + \null + }% +} + +\def\Etitlepage{% + \iffinishedtitlepage\else + \finishtitlepage + \fi + % It is important to do the page break before ending the group, + % because the headline and footline are only empty inside the group. + % If we use the new definition of \page, we always get a blank page + % after the title page, which we certainly don't want. + \oldpage + \endgroup + % + % Need this before the \...aftertitlepage checks so that if they are + % in effect the toc pages will come out with page numbers. + \HEADINGSon +} + +\def\finishtitlepage{% + \vskip4pt \hrule height 2pt width \hsize + \vskip\titlepagebottomglue + \finishedtitlepagetrue +} + +% Settings used for typesetting titles: no hyphenation, no indentation, +% don't worry much about spacing, ragged right. This should be used +% inside a \vbox, and fonts need to be set appropriately first. \par should +% be specified before the end of the \vbox, since a vbox is a group. +% +\def\raggedtitlesettings{% + \rm + \hyphenpenalty=10000 + \parindent=0pt + \tolerance=5000 + \ptexraggedright +} + +% Macros to be used within @titlepage: + +\let\subtitlerm=\rmfont +\def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines} + +\parseargdef\title{% + \checkenv\titlepage + \vbox{\titlefonts \raggedtitlesettings #1\par}% + % print a rule at the page bottom also. + \finishedtitlepagefalse + \vskip4pt \hrule height 4pt width \hsize \vskip4pt +} + +\parseargdef\subtitle{% + \checkenv\titlepage + {\subtitlefont \rightline{#1}}% +} + +% @author should come last, but may come many times. +% It can also be used inside @quotation. +% +\parseargdef\author{% + \def\temp{\quotation}% + \ifx\thisenv\temp + \def\quotationauthor{#1}% printed in \Equotation. + \else + \checkenv\titlepage + \ifseenauthor\else \vskip 0pt plus 1filll \seenauthortrue \fi + {\secfonts\rm \leftline{#1}}% + \fi +} + + +% Set up page headings and footings. + +\let\thispage=\folio + +\newtoks\evenheadline % headline on even pages +\newtoks\oddheadline % headline on odd pages +\newtoks\evenfootline % footline on even pages +\newtoks\oddfootline % footline on odd pages + +% Now make \makeheadline and \makefootline in Plain TeX use those variables +\headline={{\textfonts\rm \ifodd\pageno \the\oddheadline + \else \the\evenheadline \fi}} +\footline={{\textfonts\rm \ifodd\pageno \the\oddfootline + \else \the\evenfootline \fi}\HEADINGShook} +\let\HEADINGShook=\relax + +% Commands to set those variables. +% For example, this is what @headings on does +% @evenheading @thistitle|@thispage|@thischapter +% @oddheading @thischapter|@thispage|@thistitle +% @evenfooting @thisfile|| +% @oddfooting ||@thisfile + + +\def\evenheading{\parsearg\evenheadingxxx} +\def\evenheadingxxx #1{\evenheadingyyy #1\|\|\|\|\finish} +\def\evenheadingyyy #1\|#2\|#3\|#4\finish{% +\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\def\oddheading{\parsearg\oddheadingxxx} +\def\oddheadingxxx #1{\oddheadingyyy #1\|\|\|\|\finish} +\def\oddheadingyyy #1\|#2\|#3\|#4\finish{% +\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\parseargdef\everyheading{\oddheadingxxx{#1}\evenheadingxxx{#1}}% + +\def\evenfooting{\parsearg\evenfootingxxx} +\def\evenfootingxxx #1{\evenfootingyyy #1\|\|\|\|\finish} +\def\evenfootingyyy #1\|#2\|#3\|#4\finish{% +\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} + +\def\oddfooting{\parsearg\oddfootingxxx} +\def\oddfootingxxx #1{\oddfootingyyy #1\|\|\|\|\finish} +\def\oddfootingyyy #1\|#2\|#3\|#4\finish{% + \global\oddfootline = {\rlap{\centerline{#2}}\line{#1\hfil#3}}% + % + % Leave some space for the footline. Hopefully ok to assume + % @evenfooting will not be used by itself. + \global\advance\txipageheight by -12pt + \global\advance\vsize by -12pt +} + +\parseargdef\everyfooting{\oddfootingxxx{#1}\evenfootingxxx{#1}} + +% @evenheadingmarks top \thischapter <- chapter at the top of a page +% @evenheadingmarks bottom \thischapter <- chapter at the bottom of a page +% +% The same set of arguments for: +% +% @oddheadingmarks +% @evenfootingmarks +% @oddfootingmarks +% @everyheadingmarks +% @everyfootingmarks + +% These define \getoddheadingmarks, \getevenheadingmarks, +% \getoddfootingmarks, and \getevenfootingmarks, each to one of +% \gettopheadingmarks, \getbottomheadingmarks. +% +\def\evenheadingmarks{\headingmarks{even}{heading}} +\def\oddheadingmarks{\headingmarks{odd}{heading}} +\def\evenfootingmarks{\headingmarks{even}{footing}} +\def\oddfootingmarks{\headingmarks{odd}{footing}} +\parseargdef\everyheadingmarks{\headingmarks{even}{heading}{#1} + \headingmarks{odd}{heading}{#1} } +\parseargdef\everyfootingmarks{\headingmarks{even}{footing}{#1} + \headingmarks{odd}{footing}{#1} } +% #1 = even/odd, #2 = heading/footing, #3 = top/bottom. +\def\headingmarks#1#2#3 {% + \expandafter\let\expandafter\temp \csname get#3headingmarks\endcsname + \global\expandafter\let\csname get#1#2marks\endcsname \temp +} + +\everyheadingmarks bottom +\everyfootingmarks bottom + +% @headings double turns headings on for double-sided printing. +% @headings single turns headings on for single-sided printing. +% @headings off turns them off. +% @headings on same as @headings double, retained for compatibility. +% @headings after turns on double-sided headings after this page. +% @headings doubleafter turns on double-sided headings after this page. +% @headings singleafter turns on single-sided headings after this page. +% By default, they are off at the start of a document, +% and turned `on' after @end titlepage. + +\parseargdef\headings{\csname HEADINGS#1\endcsname} + +\def\headingsoff{% non-global headings elimination + \evenheadline={\hfil}\evenfootline={\hfil}% + \oddheadline={\hfil}\oddfootline={\hfil}% +} + +\def\HEADINGSoff{{\globaldefs=1 \headingsoff}} % global setting +\HEADINGSoff % it's the default + +% When we turn headings on, set the page number to 1. +% For double-sided printing, put current file name in lower left corner, +% chapter name on inside top of right hand pages, document +% title on inside top of left hand pages, and page numbers on outside top +% edge of all pages. +\def\HEADINGSdouble{% +\global\pageno=1 +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\folio\hfil\thistitle}} +\global\oddheadline={\line{\thischapterheading\hfil\folio}} +\global\let\contentsalignmacro = \chapoddpage +} +\let\contentsalignmacro = \chappager + +% For single-sided printing, chapter title goes across top left of page, +% page number on top right. +\def\HEADINGSsingle{% +\global\pageno=1 +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\thischapterheading\hfil\folio}} +\global\oddheadline={\line{\thischapterheading\hfil\folio}} +\global\let\contentsalignmacro = \chappager +} +\def\HEADINGSon{\HEADINGSdouble} + +\def\HEADINGSafter{\let\HEADINGShook=\HEADINGSdoublex} +\let\HEADINGSdoubleafter=\HEADINGSafter +\def\HEADINGSdoublex{% +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\folio\hfil\thistitle}} +\global\oddheadline={\line{\thischapterheading\hfil\folio}} +\global\let\contentsalignmacro = \chapoddpage +} + +\def\HEADINGSsingleafter{\let\HEADINGShook=\HEADINGSsinglex} +\def\HEADINGSsinglex{% +\global\evenfootline={\hfil} +\global\oddfootline={\hfil} +\global\evenheadline={\line{\thischapterheading\hfil\folio}} +\global\oddheadline={\line{\thischapterheading\hfil\folio}} +\global\let\contentsalignmacro = \chappager +} + +% Subroutines used in generating headings +% This produces Day Month Year style of output. +% Only define if not already defined, in case a txi-??.tex file has set +% up a different format (e.g., txi-cs.tex does this). +\ifx\today\thisisundefined +\def\today{% + \number\day\space + \ifcase\month + \or\putwordMJan\or\putwordMFeb\or\putwordMMar\or\putwordMApr + \or\putwordMMay\or\putwordMJun\or\putwordMJul\or\putwordMAug + \or\putwordMSep\or\putwordMOct\or\putwordMNov\or\putwordMDec + \fi + \space\number\year} +\fi + +% @settitle line... specifies the title of the document, for headings. +% It generates no output of its own. +\def\thistitle{\putwordNoTitle} +\def\settitle{\parsearg{\gdef\thistitle}} + + +\message{tables,} +% Tables -- @table, @ftable, @vtable, @item(x). + +% default indentation of table text +\newdimen\tableindent \tableindent=.8in +% default indentation of @itemize and @enumerate text +\newdimen\itemindent \itemindent=.3in +% margin between end of table item and start of table text. +\newdimen\itemmargin \itemmargin=.1in + +% used internally for \itemindent minus \itemmargin +\newdimen\itemmax + +% Note @table, @ftable, and @vtable define @item, @itemx, etc., with +% these defs. +% They also define \itemindex +% to index the item name in whatever manner is desired (perhaps none). + +\newif\ifitemxneedsnegativevskip + +\def\itemxpar{\par\ifitemxneedsnegativevskip\nobreak\vskip-\parskip\nobreak\fi} + +\def\internalBitem{\smallbreak \parsearg\itemzzz} +\def\internalBitemx{\itemxpar \parsearg\itemzzz} + +\def\itemzzz #1{\begingroup % + \advance\hsize by -\rightskip + \advance\hsize by -\tableindent + \setbox0=\hbox{\itemindicate{#1}}% + \itemindex{#1}% + \nobreak % This prevents a break before @itemx. + % + % If the item text does not fit in the space we have, put it on a line + % by itself, and do not allow a page break either before or after that + % line. We do not start a paragraph here because then if the next + % command is, e.g., @kindex, the whatsit would get put into the + % horizontal list on a line by itself, resulting in extra blank space. + \ifdim \wd0>\itemmax + % + % Make this a paragraph so we get the \parskip glue and wrapping, + % but leave it ragged-right. + \begingroup + \advance\leftskip by-\tableindent + \advance\hsize by\tableindent + \advance\rightskip by0pt plus1fil\relax + \leavevmode\unhbox0\par + \endgroup + % + % We're going to be starting a paragraph, but we don't want the + % \parskip glue -- logically it's part of the @item we just started. + \nobreak \vskip-\parskip + % + % Stop a page break at the \parskip glue coming up. However, if + % what follows is an environment such as @example, there will be no + % \parskip glue; then the negative vskip we just inserted would + % cause the example and the item to crash together. So we use this + % bizarre value of 10001 as a signal to \aboveenvbreak to insert + % \parskip glue after all. Section titles are handled this way also. + % + \penalty 10001 + \endgroup + \itemxneedsnegativevskipfalse + \else + % The item text fits into the space. Start a paragraph, so that the + % following text (if any) will end up on the same line. + \noindent + % Do this with kerns and \unhbox so that if there is a footnote in + % the item text, it can migrate to the main vertical list and + % eventually be printed. + \nobreak\kern-\tableindent + \dimen0 = \itemmax \advance\dimen0 by \itemmargin \advance\dimen0 by -\wd0 + \unhbox0 + \nobreak\kern\dimen0 + \endgroup + \itemxneedsnegativevskiptrue + \fi +} + +\def\item{\errmessage{@item while not in a list environment}} +\def\itemx{\errmessage{@itemx while not in a list environment}} + +% @table, @ftable, @vtable. +\envdef\table{% + \let\itemindex\gobble + \tablecheck{table}% +} +\envdef\ftable{% + \def\itemindex ##1{\doind {fn}{\code{##1}}}% + \tablecheck{ftable}% +} +\envdef\vtable{% + \def\itemindex ##1{\doind {vr}{\code{##1}}}% + \tablecheck{vtable}% +} +\def\tablecheck#1{% + \ifnum \the\catcode`\^^M=\active + \endgroup + \errmessage{This command won't work in this context; perhaps the problem is + that we are \inenvironment\thisenv}% + \def\next{\doignore{#1}}% + \else + \let\next\tablex + \fi + \next +} +\def\tablex#1{% + \def\itemindicate{#1}% + \parsearg\tabley +} +\def\tabley#1{% + {% + \makevalueexpandable + \edef\temp{\noexpand\tablez #1\space\space\space}% + \expandafter + }\temp \endtablez +} +\def\tablez #1 #2 #3 #4\endtablez{% + \aboveenvbreak + \ifnum 0#1>0 \advance \leftskip by #1\mil \fi + \ifnum 0#2>0 \tableindent=#2\mil \fi + \ifnum 0#3>0 \advance \rightskip by #3\mil \fi + \itemmax=\tableindent + \advance \itemmax by -\itemmargin + \advance \leftskip by \tableindent + \exdentamount=\tableindent + \parindent = 0pt + \parskip = \smallskipamount + \ifdim \parskip=0pt \parskip=2pt \fi + \let\item = \internalBitem + \let\itemx = \internalBitemx +} +\def\Etable{\endgraf\afterenvbreak} +\let\Eftable\Etable +\let\Evtable\Etable +\let\Eitemize\Etable +\let\Eenumerate\Etable + +% This is the counter used by @enumerate, which is really @itemize + +\newcount \itemno + +\envdef\itemize{\parsearg\doitemize} + +\def\doitemize#1{% + \aboveenvbreak + \itemmax=\itemindent + \advance\itemmax by -\itemmargin + \advance\leftskip by \itemindent + \exdentamount=\itemindent + \parindent=0pt + \parskip=\smallskipamount + \ifdim\parskip=0pt \parskip=2pt \fi + % + % Try typesetting the item mark so that if the document erroneously says + % something like @itemize @samp (intending @table), there's an error + % right away at the @itemize. It's not the best error message in the + % world, but it's better than leaving it to the @item. This means if + % the user wants an empty mark, they have to say @w{} not just @w. + \def\itemcontents{#1}% + \setbox0 = \hbox{\itemcontents}% + % + % @itemize with no arg is equivalent to @itemize @bullet. + \ifx\itemcontents\empty\def\itemcontents{\bullet}\fi + % + \let\item=\itemizeitem +} + +% Definition of @item while inside @itemize and @enumerate. +% +\def\itemizeitem{% + \advance\itemno by 1 % for enumerations + {\let\par=\endgraf \smallbreak}% reasonable place to break + {% + % If the document has an @itemize directly after a section title, a + % \nobreak will be last on the list, and \sectionheading will have + % done a \vskip-\parskip. In that case, we don't want to zero + % parskip, or the item text will crash with the heading. On the + % other hand, when there is normal text preceding the item (as there + % usually is), we do want to zero parskip, or there would be too much + % space. In that case, we won't have a \nobreak before. At least + % that's the theory. + \ifnum\lastpenalty<10000 \parskip=0in \fi + \noindent + \hbox to 0pt{\hss \itemcontents \kern\itemmargin}% + % + \ifinner\else + \vadjust{\penalty 1200}% not good to break after first line of item. + \fi + % We can be in inner vertical mode in a footnote, although an + % @itemize looks awful there. + }% + \flushcr +} + +% \splitoff TOKENS\endmark defines \first to be the first token in +% TOKENS, and \rest to be the remainder. +% +\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}% + +% Allow an optional argument of an uppercase letter, lowercase letter, +% or number, to specify the first label in the enumerated list. No +% argument is the same as `1'. +% +\envparseargdef\enumerate{\enumeratey #1 \endenumeratey} +\def\enumeratey #1 #2\endenumeratey{% + % If we were given no argument, pretend we were given `1'. + \def\thearg{#1}% + \ifx\thearg\empty \def\thearg{1}\fi + % + % Detect if the argument is a single token. If so, it might be a + % letter. Otherwise, the only valid thing it can be is a number. + % (We will always have one token, because of the test we just made. + % This is a good thing, since \splitoff doesn't work given nothing at + % all -- the first parameter is undelimited.) + \expandafter\splitoff\thearg\endmark + \ifx\rest\empty + % Only one token in the argument. It could still be anything. + % A ``lowercase letter'' is one whose \lccode is nonzero. + % An ``uppercase letter'' is one whose \lccode is both nonzero, and + % not equal to itself. + % Otherwise, we assume it's a number. + % + % We need the \relax at the end of the \ifnum lines to stop TeX from + % continuing to look for a <number>. + % + \ifnum\lccode\expandafter`\thearg=0\relax + \numericenumerate % a number (we hope) + \else + % It's a letter. + \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax + \lowercaseenumerate % lowercase letter + \else + \uppercaseenumerate % uppercase letter + \fi + \fi + \else + % Multiple tokens in the argument. We hope it's a number. + \numericenumerate + \fi +} + +% An @enumerate whose labels are integers. The starting integer is +% given in \thearg. +% +\def\numericenumerate{% + \itemno = \thearg + \startenumeration{\the\itemno}% +} + +% The starting (lowercase) letter is in \thearg. +\def\lowercaseenumerate{% + \itemno = \expandafter`\thearg + \startenumeration{% + % Be sure we're not beyond the end of the alphabet. + \ifnum\itemno=0 + \errmessage{No more lowercase letters in @enumerate; get a bigger + alphabet}% + \fi + \char\lccode\itemno + }% +} + +% The starting (uppercase) letter is in \thearg. +\def\uppercaseenumerate{% + \itemno = \expandafter`\thearg + \startenumeration{% + % Be sure we're not beyond the end of the alphabet. + \ifnum\itemno=0 + \errmessage{No more uppercase letters in @enumerate; get a bigger + alphabet} + \fi + \char\uccode\itemno + }% +} + +% Call \doitemize, adding a period to the first argument and supplying the +% common last two arguments. Also subtract one from the initial value in +% \itemno, since @item increments \itemno. +% +\def\startenumeration#1{% + \advance\itemno by -1 + \doitemize{#1.}\flushcr +} + +% @alphaenumerate and @capsenumerate are abbreviations for giving an arg +% to @enumerate. +% +\def\alphaenumerate{\enumerate{a}} +\def\capsenumerate{\enumerate{A}} +\def\Ealphaenumerate{\Eenumerate} +\def\Ecapsenumerate{\Eenumerate} + + +% @multitable macros +% Amy Hendrickson, 8/18/94, 3/6/96 +% +% @multitable ... @end multitable will make as many columns as desired. +% Contents of each column will wrap at width given in preamble. Width +% can be specified either with sample text given in a template line, +% or in percent of \hsize, the current width of text on page. + +% Table can continue over pages but will only break between lines. + +% To make preamble: +% +% Either define widths of columns in terms of percent of \hsize: +% @multitable @columnfractions .25 .3 .45 +% @item ... +% +% Numbers following @columnfractions are the percent of the total +% current hsize to be used for each column. You may use as many +% columns as desired. + + +% Or use a template: +% @multitable {Column 1 template} {Column 2 template} {Column 3 template} +% @item ... +% using the widest term desired in each column. + +% Each new table line starts with @item, each subsequent new column +% starts with @tab. Empty columns may be produced by supplying @tab's +% with nothing between them for as many times as empty columns are needed, +% ie, @tab@tab@tab will produce two empty columns. + +% @item, @tab do not need to be on their own lines, but it will not hurt +% if they are. + +% Sample multitable: + +% @multitable {Column 1 template} {Column 2 template} {Column 3 template} +% @item first col stuff @tab second col stuff @tab third col +% @item +% first col stuff +% @tab +% second col stuff +% @tab +% third col +% @item first col stuff @tab second col stuff +% @tab Many paragraphs of text may be used in any column. +% +% They will wrap at the width determined by the template. +% @item@tab@tab This will be in third column. +% @end multitable + +% Default dimensions may be reset by user. +% @multitableparskip is vertical space between paragraphs in table. +% @multitableparindent is paragraph indent in table. +% @multitablecolmargin is horizontal space to be left between columns. +% @multitablelinespace is space to leave between table items, baseline +% to baseline. +% 0pt means it depends on current normal line spacing. +% +\newskip\multitableparskip +\newskip\multitableparindent +\newdimen\multitablecolspace +\newskip\multitablelinespace +\multitableparskip=0pt +\multitableparindent=6pt +\multitablecolspace=12pt +\multitablelinespace=0pt + +% Macros used to set up halign preamble: +% +\let\endsetuptable\relax +\def\xendsetuptable{\endsetuptable} +\let\columnfractions\relax +\def\xcolumnfractions{\columnfractions} +\newif\ifsetpercent + +% #1 is the @columnfraction, usually a decimal number like .5, but might +% be just 1. We just use it, whatever it is. +% +\def\pickupwholefraction#1 {% + \global\advance\colcount by 1 + \expandafter\xdef\csname col\the\colcount\endcsname{#1\hsize}% + \setuptable +} + +\newcount\colcount +\def\setuptable#1{% + \def\firstarg{#1}% + \ifx\firstarg\xendsetuptable + \let\go = \relax + \else + \ifx\firstarg\xcolumnfractions + \global\setpercenttrue + \else + \ifsetpercent + \let\go\pickupwholefraction + \else + \global\advance\colcount by 1 + \setbox0=\hbox{#1\unskip\space}% Add a normal word space as a + % separator; typically that is always in the input, anyway. + \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}% + \fi + \fi + \ifx\go\pickupwholefraction + % Put the argument back for the \pickupwholefraction call, so + % we'll always have a period there to be parsed. + \def\go{\pickupwholefraction#1}% + \else + \let\go = \setuptable + \fi% + \fi + \go +} + +% multitable-only commands. +% +% @headitem starts a heading row, which we typeset in bold. Assignments +% have to be global since we are inside the implicit group of an +% alignment entry. \everycr below resets \everytab so we don't have to +% undo it ourselves. +\def\headitemfont{\b}% for people to use in the template row; not changeable +\def\headitem{% + \checkenv\multitable + \crcr + \gdef\headitemcrhook{\nobreak}% attempt to avoid page break after headings + \global\everytab={\bf}% can't use \headitemfont since the parsing differs + \the\everytab % for the first item +}% +% +% default for tables with no headings. +\let\headitemcrhook=\relax +% +% A \tab used to include \hskip1sp. But then the space in a template +% line is not enough. That is bad. So let's go back to just `&' until +% we again encounter the problem the 1sp was intended to solve. +% --karl, nathan@acm.org, 20apr99. +\def\tab{\checkenv\multitable &\the\everytab}% + +% @multitable ... @end multitable definitions: +% +\newtoks\everytab % insert after every tab. +% +\envdef\multitable{% + \vskip\parskip + \startsavinginserts + % + % @item within a multitable starts a normal row. + % We use \def instead of \let so that if one of the multitable entries + % contains an @itemize, we don't choke on the \item (seen as \crcr aka + % \endtemplate) expanding \doitemize. + \def\item{\crcr}% + % + \tolerance=9500 + \hbadness=9500 + \setmultitablespacing + \parskip=\multitableparskip + \parindent=\multitableparindent + \overfullrule=0pt + \global\colcount=0 + % + \everycr = {% + \noalign{% + \global\everytab={}% Reset from possible headitem. + \global\colcount=0 % Reset the column counter. + % + % Check for saved footnotes, etc.: + \checkinserts + % + % Perhaps a \nobreak, then reset: + \headitemcrhook + \global\let\headitemcrhook=\relax + }% + }% + % + \parsearg\domultitable +} +\def\domultitable#1{% + % To parse everything between @multitable and @item: + \setuptable#1 \endsetuptable + % + % This preamble sets up a generic column definition, which will + % be used as many times as user calls for columns. + % \vtop will set a single line and will also let text wrap and + % continue for many paragraphs if desired. + \halign\bgroup &% + \global\advance\colcount by 1 + \multistrut + \vtop{% + % Use the current \colcount to find the correct column width: + \hsize=\expandafter\csname col\the\colcount\endcsname + % + % In order to keep entries from bumping into each other + % we will add a \leftskip of \multitablecolspace to all columns after + % the first one. + % + % If a template has been used, we will add \multitablecolspace + % to the width of each template entry. + % + % If the user has set preamble in terms of percent of \hsize we will + % use that dimension as the width of the column, and the \leftskip + % will keep entries from bumping into each other. Table will start at + % left margin and final column will justify at right margin. + % + % Make sure we don't inherit \rightskip from the outer environment. + \rightskip=0pt + \ifnum\colcount=1 + % The first column will be indented with the surrounding text. + \advance\hsize by\leftskip + \else + \ifsetpercent \else + % If user has not set preamble in terms of percent of \hsize + % we will advance \hsize by \multitablecolspace. + \advance\hsize by \multitablecolspace + \fi + % In either case we will make \leftskip=\multitablecolspace: + \leftskip=\multitablecolspace + \fi + % Ignoring space at the beginning and end avoids an occasional spurious + % blank line, when TeX decides to break the line at the space before the + % box from the multistrut, so the strut ends up on a line by itself. + % For example: + % @multitable @columnfractions .11 .89 + % @item @code{#} + % @tab Legal holiday which is valid in major parts of the whole country. + % Is automatically provided with highlighting sequences respectively + % marking characters. + \noindent\ignorespaces##\unskip\multistrut + }\cr +} +\def\Emultitable{% + \crcr + \egroup % end the \halign + \global\setpercentfalse +} + +\def\setmultitablespacing{% + \def\multistrut{\strut}% just use the standard line spacing + % + % Compute \multitablelinespace (if not defined by user) for use in + % \multitableparskip calculation. We used define \multistrut based on + % this, but (ironically) that caused the spacing to be off. + % See bug-texinfo report from Werner Lemberg, 31 Oct 2004 12:52:20 +0100. +\ifdim\multitablelinespace=0pt +\setbox0=\vbox{X}\global\multitablelinespace=\the\baselineskip +\global\advance\multitablelinespace by-\ht0 +\fi +% Test to see if parskip is larger than space between lines of +% table. If not, do nothing. +% If so, set to same dimension as multitablelinespace. +\ifdim\multitableparskip>\multitablelinespace +\global\multitableparskip=\multitablelinespace +\global\advance\multitableparskip-7pt % to keep parskip somewhat smaller + % than skip between lines in the table. +\fi% +\ifdim\multitableparskip=0pt +\global\multitableparskip=\multitablelinespace +\global\advance\multitableparskip-7pt % to keep parskip somewhat smaller + % than skip between lines in the table. +\fi} + + +\message{conditionals,} + +% @iftex, @ifnotdocbook, @ifnothtml, @ifnotinfo, @ifnotplaintext, +% @ifnotxml always succeed. They currently do nothing; we don't +% attempt to check whether the conditionals are properly nested. But we +% have to remember that they are conditionals, so that @end doesn't +% attempt to close an environment group. +% +\def\makecond#1{% + \expandafter\let\csname #1\endcsname = \relax + \expandafter\let\csname iscond.#1\endcsname = 1 +} +\makecond{iftex} +\makecond{ifnotdocbook} +\makecond{ifnothtml} +\makecond{ifnotinfo} +\makecond{ifnotplaintext} +\makecond{ifnotxml} + +% Ignore @ignore, @ifhtml, @ifinfo, and the like. +% +\def\direntry{\doignore{direntry}} +\def\documentdescription{\doignore{documentdescription}} +\def\docbook{\doignore{docbook}} +\def\html{\doignore{html}} +\def\ifdocbook{\doignore{ifdocbook}} +\def\ifhtml{\doignore{ifhtml}} +\def\ifinfo{\doignore{ifinfo}} +\def\ifnottex{\doignore{ifnottex}} +\def\ifplaintext{\doignore{ifplaintext}} +\def\ifxml{\doignore{ifxml}} +\def\ignore{\doignore{ignore}} +\def\menu{\doignore{menu}} +\def\xml{\doignore{xml}} + +% Ignore text until a line `@end #1', keeping track of nested conditionals. +% +% A count to remember the depth of nesting. +\newcount\doignorecount + +\def\doignore#1{\begingroup + % Scan in ``verbatim'' mode: + \obeylines + \catcode`\@ = \other + \catcode`\{ = \other + \catcode`\} = \other + % + % Make sure that spaces turn into tokens that match what \doignoretext wants. + \spaceisspace + % + % Count number of #1's that we've seen. + \doignorecount = 0 + % + % Swallow text until we reach the matching `@end #1'. + \dodoignore{#1}% +} + +{ \catcode`_=11 % We want to use \_STOP_ which cannot appear in texinfo source. + \obeylines % + % + \gdef\dodoignore#1{% + % #1 contains the command name as a string, e.g., `ifinfo'. + % + % Define a command to find the next `@end #1'. + \long\def\doignoretext##1^^M@end #1{% + \doignoretextyyy##1^^M@#1\_STOP_}% + % + % And this command to find another #1 command, at the beginning of a + % line. (Otherwise, we would consider a line `@c @ifset', for + % example, to count as an @ifset for nesting.) + \long\def\doignoretextyyy##1^^M@#1##2\_STOP_{\doignoreyyy{##2}\_STOP_}% + % + % And now expand that command. + \doignoretext ^^M% + }% +} + +\def\doignoreyyy#1{% + \def\temp{#1}% + \ifx\temp\empty % Nothing found. + \let\next\doignoretextzzz + \else % Found a nested condition, ... + \advance\doignorecount by 1 + \let\next\doignoretextyyy % ..., look for another. + % If we're here, #1 ends with ^^M\ifinfo (for example). + \fi + \next #1% the token \_STOP_ is present just after this macro. +} + +% We have to swallow the remaining "\_STOP_". +% +\def\doignoretextzzz#1{% + \ifnum\doignorecount = 0 % We have just found the outermost @end. + \let\next\enddoignore + \else % Still inside a nested condition. + \advance\doignorecount by -1 + \let\next\doignoretext % Look for the next @end. + \fi + \next +} + +% Finish off ignored text. +{ \obeylines% + % Ignore anything after the last `@end #1'; this matters in verbatim + % environments, where otherwise the newline after an ignored conditional + % would result in a blank line in the output. + \gdef\enddoignore#1^^M{\endgroup\ignorespaces}% +} + + +% @set VAR sets the variable VAR to an empty value. +% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. +% +% Since we want to separate VAR from REST-OF-LINE (which might be +% empty), we can't just use \parsearg; we have to insert a space of our +% own to delimit the rest of the line, and then take it out again if we +% didn't need it. +% We rely on the fact that \parsearg sets \catcode`\ =10. +% +\parseargdef\set{\setyyy#1 \endsetyyy} +\def\setyyy#1 #2\endsetyyy{% + {% + \makevalueexpandable + \def\temp{#2}% + \edef\next{\gdef\makecsname{SET#1}}% + \ifx\temp\empty + \next{}% + \else + \setzzz#2\endsetzzz + \fi + }% +} +% Remove the trailing space \setxxx inserted. +\def\setzzz#1 \endsetzzz{\next{#1}} + +% @clear VAR clears (i.e., unsets) the variable VAR. +% +\parseargdef\clear{% + {% + \makevalueexpandable + \global\expandafter\let\csname SET#1\endcsname=\relax + }% +} + +% @value{foo} gets the text saved in variable foo. +\def\value{\begingroup\makevalueexpandable\valuexxx} +\def\valuexxx#1{\expandablevalue{#1}\endgroup} +{ + \catcode`\-=\active \catcode`\_=\active + % + \gdef\makevalueexpandable{% + \let\value = \expandablevalue + % We don't want these characters active, ... + \catcode`\-=\other \catcode`\_=\other + % ..., but we might end up with active ones in the argument if + % we're called from @code, as @code{@value{foo-bar_}}, though. + % So \let them to their normal equivalents. + \let-\normaldash \let_\normalunderscore + } +} + +% We have this subroutine so that we can handle at least some @value's +% properly in indexes (we call \makevalueexpandable in \indexdummies). +% The command has to be fully expandable (if the variable is set), since +% the result winds up in the index file. This means that if the +% variable's value contains other Texinfo commands, it's almost certain +% it will fail (although perhaps we could fix that with sufficient work +% to do a one-level expansion on the result, instead of complete). +% +% Unfortunately, this has the consequence that when _ is in the *value* +% of an @set, it does not print properly in the roman fonts (get the cmr +% dot accent at position 126 instead). No fix comes to mind, and it's +% been this way since 2003 or earlier, so just ignore it. +% +\def\expandablevalue#1{% + \expandafter\ifx\csname SET#1\endcsname\relax + {[No value for ``#1'']}% + \message{Variable `#1', used in @value, is not set.}% + \else + \csname SET#1\endcsname + \fi +} + +% Like \expandablevalue, but completely expandable (the \message in the +% definition above operates at the execution level of TeX). Used when +% writing to auxiliary files, due to the expansion that \write does. +% If flag is undefined, pass through an unexpanded @value command: maybe it +% will be set by the time it is read back in. +% +% NB flag names containing - or _ may not work here. +\def\dummyvalue#1{% + \expandafter\ifx\csname SET#1\endcsname\relax + \noexpand\value{#1}% + \else + \csname SET#1\endcsname + \fi +} + +% Used for @value's in index entries to form the sort key: expand the @value +% if possible, otherwise sort late. +\def\indexnofontsvalue#1{% + \expandafter\ifx\csname SET#1\endcsname\relax + ZZZZZZZ + \else + \csname SET#1\endcsname + \fi +} + +% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined +% with @set. +% +% To get the special treatment we need for `@end ifset,' we call +% \makecond and then redefine. +% +\makecond{ifset} +\def\ifset{\parsearg{\doifset{\let\next=\ifsetfail}}} +\def\doifset#1#2{% + {% + \makevalueexpandable + \let\next=\empty + \expandafter\ifx\csname SET#2\endcsname\relax + #1% If not set, redefine \next. + \fi + \expandafter + }\next +} +\def\ifsetfail{\doignore{ifset}} + +% @ifclear VAR ... @end executes the `...' iff VAR has never been +% defined with @set, or has been undefined with @clear. +% +% The `\else' inside the `\doifset' parameter is a trick to reuse the +% above code: if the variable is not set, do nothing, if it is set, +% then redefine \next to \ifclearfail. +% +\makecond{ifclear} +\def\ifclear{\parsearg{\doifset{\else \let\next=\ifclearfail}}} +\def\ifclearfail{\doignore{ifclear}} + +% @ifcommandisdefined CMD ... @end executes the `...' if CMD (written +% without the @) is in fact defined. We can only feasibly check at the +% TeX level, so something like `mathcode' is going to considered +% defined even though it is not a Texinfo command. +% +\makecond{ifcommanddefined} +\def\ifcommanddefined{\parsearg{\doifcmddefined{\let\next=\ifcmddefinedfail}}} +% +\def\doifcmddefined#1#2{{% + \makevalueexpandable + \let\next=\empty + \expandafter\ifx\csname #2\endcsname\relax + #1% If not defined, \let\next as above. + \fi + \expandafter + }\next +} +\def\ifcmddefinedfail{\doignore{ifcommanddefined}} + +% @ifcommandnotdefined CMD ... handled similar to @ifclear above. +\makecond{ifcommandnotdefined} +\def\ifcommandnotdefined{% + \parsearg{\doifcmddefined{\else \let\next=\ifcmdnotdefinedfail}}} +\def\ifcmdnotdefinedfail{\doignore{ifcommandnotdefined}} + +% Set the `txicommandconditionals' variable, so documents have a way to +% test if the @ifcommand...defined conditionals are available. +\set txicommandconditionals + +% @dircategory CATEGORY -- specify a category of the dir file +% which this file should belong to. Ignore this in TeX. +\let\dircategory=\comment + +% @defininfoenclose. +\let\definfoenclose=\comment + + +\message{indexing,} +% Index generation facilities + +% Define \newwrite to be identical to plain tex's \newwrite +% except not \outer, so it can be used within macros and \if's. +\edef\newwrite{\makecsname{ptexnewwrite}} + +% \newindex {foo} defines an index named IX. +% It automatically defines \IXindex such that +% \IXindex ...rest of line... puts an entry in the index IX. +% It also defines \IXindfile to be the number of the output channel for +% the file that accumulates this index. The file's extension is IX. +% The name of an index should be no more than 2 characters long +% for the sake of vms. +% +\def\newindex#1{% + \expandafter\chardef\csname#1indfile\endcsname=0 + \expandafter\xdef\csname#1index\endcsname{% % Define @#1index + \noexpand\doindex{#1}} +} + +% @defindex foo == \newindex{foo} +% +\def\defindex{\parsearg\newindex} + +% Define @defcodeindex, like @defindex except put all entries in @code. +% +\def\defcodeindex{\parsearg\newcodeindex} +% +\def\newcodeindex#1{% + \expandafter\chardef\csname#1indfile\endcsname=0 + \expandafter\xdef\csname#1index\endcsname{% + \noexpand\docodeindex{#1}}% +} + +% The default indices: +\newindex{cp}% concepts, +\newcodeindex{fn}% functions, +\newcodeindex{vr}% variables, +\newcodeindex{tp}% types, +\newcodeindex{ky}% keys +\newcodeindex{pg}% and programs. + + +% @synindex foo bar makes index foo feed into index bar. +% Do this instead of @defindex foo if you don't want it as a separate index. +% +% @syncodeindex foo bar similar, but put all entries made for index foo +% inside @code. +% +\def\synindex#1 #2 {\dosynindex\doindex{#1}{#2}} +\def\syncodeindex#1 #2 {\dosynindex\docodeindex{#1}{#2}} + +% #1 is \doindex or \docodeindex, #2 the index getting redefined (foo), +% #3 the target index (bar). +\def\dosynindex#1#2#3{% + \requireopenindexfile{#3}% + % redefine \fooindfile: + \expandafter\let\expandafter\temp\expandafter=\csname#3indfile\endcsname + \expandafter\let\csname#2indfile\endcsname=\temp + % redefine \fooindex: + \expandafter\xdef\csname#2index\endcsname{\noexpand#1{#3}}% +} + +% Define \doindex, the driver for all index macros. +% Argument #1 is generated by the calling \fooindex macro, +% and it is the two-letter name of the index. + +\def\doindex#1{\edef\indexname{#1}\parsearg\doindexxxx} +\def\doindexxxx #1{\doind{\indexname}{#1}} + +% like the previous two, but they put @code around the argument. +\def\docodeindex#1{\edef\indexname{#1}\parsearg\docodeindexxxx} +\def\docodeindexxxx #1{\doind{\indexname}{\code{#1}}} + + +% Used when writing an index entry out to an index file to prevent +% expansion of Texinfo commands that can appear in an index entry. +% +\def\indexdummies{% + \escapechar = `\\ % use backslash in output files. + \definedummyletter\@% + \definedummyletter\ % + % + % For texindex which always views { and } as separators. + \def\{{\lbracechar{}}% + \def\}{\rbracechar{}}% + % + % Do the redefinitions. + \definedummies +} + +% Used for the aux and toc files, where @ is the escape character. +% +\def\atdummies{% + \definedummyletter\@% + \definedummyletter\ % + \definedummyletter\{% + \definedummyletter\}% + % + % Do the redefinitions. + \definedummies + \otherbackslash +} + +% \definedummyword defines \#1 as \string\#1\space, thus effectively +% preventing its expansion. This is used only for control words, +% not control letters, because the \space would be incorrect for +% control characters, but is needed to separate the control word +% from whatever follows. +% +% These can be used both for control words that take an argument and +% those that do not. If it is followed by {arg} in the input, then +% that will dutifully get written to the index (or wherever). +% +% For control letters, we have \definedummyletter, which omits the +% space. +% +\def\definedummyword #1{\def#1{\string#1\space}}% +\def\definedummyletter#1{\def#1{\string#1}}% +\let\definedummyaccent\definedummyletter + +% Called from \indexdummies and \atdummies, to effectively prevent +% the expansion of commands. +% +\def\definedummies{% + % + \let\commondummyword\definedummyword + \let\commondummyletter\definedummyletter + \let\commondummyaccent\definedummyaccent + \commondummiesnofonts + % + \definedummyletter\_% + \definedummyletter\-% + % + % Non-English letters. + \definedummyword\AA + \definedummyword\AE + \definedummyword\DH + \definedummyword\L + \definedummyword\O + \definedummyword\OE + \definedummyword\TH + \definedummyword\aa + \definedummyword\ae + \definedummyword\dh + \definedummyword\exclamdown + \definedummyword\l + \definedummyword\o + \definedummyword\oe + \definedummyword\ordf + \definedummyword\ordm + \definedummyword\questiondown + \definedummyword\ss + \definedummyword\th + % + % Although these internal commands shouldn't show up, sometimes they do. + \definedummyword\bf + \definedummyword\gtr + \definedummyword\hat + \definedummyword\less + \definedummyword\sf + \definedummyword\sl + \definedummyword\tclose + \definedummyword\tt + % + \definedummyword\LaTeX + \definedummyword\TeX + % + % Assorted special characters. + \definedummyword\atchar + \definedummyword\arrow + \definedummyword\bullet + \definedummyword\comma + \definedummyword\copyright + \definedummyword\registeredsymbol + \definedummyword\dots + \definedummyword\enddots + \definedummyword\entrybreak + \definedummyword\equiv + \definedummyword\error + \definedummyword\euro + \definedummyword\expansion + \definedummyword\geq + \definedummyword\guillemetleft + \definedummyword\guillemetright + \definedummyword\guilsinglleft + \definedummyword\guilsinglright + \definedummyword\lbracechar + \definedummyword\leq + \definedummyword\mathopsup + \definedummyword\minus + \definedummyword\ogonek + \definedummyword\pounds + \definedummyword\point + \definedummyword\print + \definedummyword\quotedblbase + \definedummyword\quotedblleft + \definedummyword\quotedblright + \definedummyword\quoteleft + \definedummyword\quoteright + \definedummyword\quotesinglbase + \definedummyword\rbracechar + \definedummyword\result + \definedummyword\sub + \definedummyword\sup + \definedummyword\textdegree + % + % We want to disable all macros so that they are not expanded by \write. + \macrolist + \let\value\dummyvalue + % + \normalturnoffactive +} + +% \commondummiesnofonts: common to \definedummies and \indexnofonts. +% Define \commondummyletter, \commondummyaccent and \commondummyword before +% using. Used for accents, font commands, and various control letters. +% +\def\commondummiesnofonts{% + % Control letters and accents. + \commondummyletter\!% + \commondummyaccent\"% + \commondummyaccent\'% + \commondummyletter\*% + \commondummyaccent\,% + \commondummyletter\.% + \commondummyletter\/% + \commondummyletter\:% + \commondummyaccent\=% + \commondummyletter\?% + \commondummyaccent\^% + \commondummyaccent\`% + \commondummyaccent\~% + \commondummyword\u + \commondummyword\v + \commondummyword\H + \commondummyword\dotaccent + \commondummyword\ogonek + \commondummyword\ringaccent + \commondummyword\tieaccent + \commondummyword\ubaraccent + \commondummyword\udotaccent + \commondummyword\dotless + % + % Texinfo font commands. + \commondummyword\b + \commondummyword\i + \commondummyword\r + \commondummyword\sansserif + \commondummyword\sc + \commondummyword\slanted + \commondummyword\t + % + % Commands that take arguments. + \commondummyword\abbr + \commondummyword\acronym + \commondummyword\anchor + \commondummyword\cite + \commondummyword\code + \commondummyword\command + \commondummyword\dfn + \commondummyword\dmn + \commondummyword\email + \commondummyword\emph + \commondummyword\env + \commondummyword\file + \commondummyword\image + \commondummyword\indicateurl + \commondummyword\inforef + \commondummyword\kbd + \commondummyword\key + \commondummyword\math + \commondummyword\option + \commondummyword\pxref + \commondummyword\ref + \commondummyword\samp + \commondummyword\strong + \commondummyword\tie + \commondummyword\U + \commondummyword\uref + \commondummyword\url + \commondummyword\var + \commondummyword\verb + \commondummyword\w + \commondummyword\xref +} + +% For testing: output @{ and @} in index sort strings as \{ and \}. +\newif\ifusebracesinindexes + +\let\indexlbrace\relax +\let\indexrbrace\relax + +{\catcode`\@=0 +\catcode`\\=13 + @gdef@backslashdisappear{@def\{}} +} + +{ +\catcode`\<=13 +\catcode`\-=13 +\catcode`\`=13 + \gdef\indexnonalnumdisappear{% + \expandafter\ifx\csname SETtxiindexlquoteignore\endcsname\relax\else + % @set txiindexlquoteignore makes us ignore left quotes in the sort term. + % (Introduced for FSFS 2nd ed.) + \let`=\empty + \fi + % + \expandafter\ifx\csname SETtxiindexbackslashignore\endcsname\relax\else + \backslashdisappear + \fi + % + \expandafter\ifx\csname SETtxiindexhyphenignore\endcsname\relax\else + \def-{}% + \fi + \expandafter\ifx\csname SETtxiindexlessthanignore\endcsname\relax\else + \def<{}% + \fi + \expandafter\ifx\csname SETtxiindexatsignignore\endcsname\relax\else + \def\@{}% + \fi + } + + \gdef\indexnonalnumreappear{% + \useindexbackslash + \let-\normaldash + \let<\normalless + \def\@{@}% + } +} + + +% \indexnofonts is used when outputting the strings to sort the index +% by, and when constructing control sequence names. It eliminates all +% control sequences and just writes whatever the best ASCII sort string +% would be for a given command (usually its argument). +% +\def\indexnofonts{% + % Accent commands should become @asis. + \def\commondummyaccent##1{\let##1\asis}% + % We can just ignore other control letters. + \def\commondummyletter##1{\let##1\empty}% + % All control words become @asis by default; overrides below. + \let\commondummyword\commondummyaccent + \commondummiesnofonts + % + % Don't no-op \tt, since it isn't a user-level command + % and is used in the definitions of the active chars like <, >, |, etc. + % Likewise with the other plain tex font commands. + %\let\tt=\asis + % + \def\ { }% + \def\@{@}% + \def\_{\normalunderscore}% + \def\-{}% @- shouldn't affect sorting + % + \uccode`\1=`\{ \uppercase{\def\{{1}}% + \uccode`\1=`\} \uppercase{\def\}{1}}% + \let\lbracechar\{% + \let\rbracechar\}% + % + % Non-English letters. + \def\AA{AA}% + \def\AE{AE}% + \def\DH{DZZ}% + \def\L{L}% + \def\OE{OE}% + \def\O{O}% + \def\TH{TH}% + \def\aa{aa}% + \def\ae{ae}% + \def\dh{dzz}% + \def\exclamdown{!}% + \def\l{l}% + \def\oe{oe}% + \def\ordf{a}% + \def\ordm{o}% + \def\o{o}% + \def\questiondown{?}% + \def\ss{ss}% + \def\th{th}% + % + \def\LaTeX{LaTeX}% + \def\TeX{TeX}% + % + % Assorted special characters. \defglyph gives the control sequence a + % definition that removes the {} that follows its use. + \defglyph\atchar{@}% + \defglyph\arrow{->}% + \defglyph\bullet{bullet}% + \defglyph\comma{,}% + \defglyph\copyright{copyright}% + \defglyph\dots{...}% + \defglyph\enddots{...}% + \defglyph\equiv{==}% + \defglyph\error{error}% + \defglyph\euro{euro}% + \defglyph\expansion{==>}% + \defglyph\geq{>=}% + \defglyph\guillemetleft{<<}% + \defglyph\guillemetright{>>}% + \defglyph\guilsinglleft{<}% + \defglyph\guilsinglright{>}% + \defglyph\leq{<=}% + \defglyph\lbracechar{\{}% + \defglyph\minus{-}% + \defglyph\point{.}% + \defglyph\pounds{pounds}% + \defglyph\print{-|}% + \defglyph\quotedblbase{"}% + \defglyph\quotedblleft{"}% + \defglyph\quotedblright{"}% + \defglyph\quoteleft{`}% + \defglyph\quoteright{'}% + \defglyph\quotesinglbase{,}% + \defglyph\rbracechar{\}}% + \defglyph\registeredsymbol{R}% + \defglyph\result{=>}% + \defglyph\textdegree{o}% + % + % We need to get rid of all macros, leaving only the arguments (if present). + % Of course this is not nearly correct, but it is the best we can do for now. + % makeinfo does not expand macros in the argument to @deffn, which ends up + % writing an index entry, and texindex isn't prepared for an index sort entry + % that starts with \. + % + % Since macro invocations are followed by braces, we can just redefine them + % to take a single TeX argument. The case of a macro invocation that + % goes to end-of-line is not handled. + % + \macrolist + \let\value\indexnofontsvalue +} +\def\defglyph#1#2{\def#1##1{#2}} % see above + + + + +\let\SETmarginindex=\relax % put index entries in margin (undocumented)? + +% Most index entries go through here, but \dosubind is the general case. +% #1 is the index name, #2 is the entry text. +\def\doind#1#2{\dosubind{#1}{#2}{}} + +% There is also \dosubind {index}{topic}{subtopic} +% which makes an entry in a two-level index such as the operation index. +% TODO: Two-level index? Operation index? + +% Workhorse for all indexes. +% #1 is name of index, #2 is stuff to put there, #3 is subentry -- +% empty if called from \doind, as we usually are (the main exception +% is with most defuns, which call us directly). +% +\def\dosubind#1#2#3{% + \iflinks + {% + \requireopenindexfile{#1}% + % Store the main index entry text (including the third arg). + \toks0 = {#2}% + % If third arg is present, precede it with a space. + \def\thirdarg{#3}% + \ifx\thirdarg\empty \else + \toks0 = \expandafter{\the\toks0 \space #3}% + \fi + % + \edef\writeto{\csname#1indfile\endcsname}% + % + \safewhatsit\dosubindwrite + }% + \fi +} + +% Check if an index file has been opened, and if not, open it. +\def\requireopenindexfile#1{% +\ifnum\csname #1indfile\endcsname=0 + \expandafter\newwrite \csname#1indfile\endcsname + \edef\suffix{#1}% + % A .fls suffix would conflict with the file extension for the output + % of -recorder, so use .f1s instead. + \ifx\suffix\indexisfl\def\suffix{f1}\fi + % Open the file + \immediate\openout\csname#1indfile\endcsname \jobname.\suffix + % Using \immediate above here prevents an object entering into the current + % box, which could confound checks such as those in \safewhatsit for + % preceding skips. + \typeout{Writing index file \jobname.\suffix}% +\fi} +\def\indexisfl{fl} + +% Output \ as {\indexbackslash}, because \ is an escape character in +% the index files. +\let\indexbackslash=\relax +{\catcode`\@=0 \catcode`\\=\active + @gdef@useindexbackslash{@def\{{@indexbackslash}}} +} + +% Definition for writing index entry text. +\def\sortas#1{\ignorespaces}% + +% Definition for writing index entry sort key. Should occur at the at +% the beginning of the index entry, like +% @cindex @sortas{september} \september +% The \ignorespaces takes care of following space, but there's no way +% to remove space before it. +{ +\catcode`\-=13 +\gdef\indexwritesortas{% + \begingroup + \indexnonalnumreappear + \indexwritesortasxxx} +\gdef\indexwritesortasxxx#1{% + \xdef\indexsortkey{#1}\endgroup} +} + + +% Write the entry in \toks0 to the index file. +% +\def\dosubindwrite{% + % Put the index entry in the margin if desired. + \ifx\SETmarginindex\relax\else + \insert\margin{\hbox{\vrule height8pt depth3pt width0pt \the\toks0}}% + \fi + % + % Remember, we are within a group. + \indexdummies % Must do this here, since \bf, etc expand at this stage + \useindexbackslash % \indexbackslash isn't defined now so it will be output + % as is; and it will print as backslash. + % The braces around \indexbrace are recognized by texindex. + % + % Get the string to sort by, by processing the index entry with all + % font commands turned off. + {\indexnofonts + \def\lbracechar{{\indexlbrace}}% + \def\rbracechar{{\indexrbrace}}% + \let\{=\lbracechar + \let\}=\rbracechar + \indexnonalnumdisappear + \xdef\indexsortkey{}% + \let\sortas=\indexwritesortas + \edef\temp{\the\toks0}% + \setbox\dummybox = \hbox{\temp}% Make sure to execute any \sortas + \ifx\indexsortkey\empty + \xdef\indexsortkey{\temp}% + \ifx\indexsortkey\empty\xdef\indexsortkey{ }\fi + \fi + }% + % + % Set up the complete index entry, with both the sort key and + % the original text, including any font commands. We write + % three arguments to \entry to the .?? file (four in the + % subentry case), texindex reduces to two when writing the .??s + % sorted result. + \edef\temp{% + \write\writeto{% + \string\entry{\indexsortkey}{\noexpand\folio}{\the\toks0}}% + }% + \temp +} +\newbox\dummybox % used above + +% Take care of unwanted page breaks/skips around a whatsit: +% +% If a skip is the last thing on the list now, preserve it +% by backing up by \lastskip, doing the \write, then inserting +% the skip again. Otherwise, the whatsit generated by the +% \write or \pdfdest will make \lastskip zero. The result is that +% sequences like this: +% @end defun +% @tindex whatever +% @defun ... +% will have extra space inserted, because the \medbreak in the +% start of the @defun won't see the skip inserted by the @end of +% the previous defun. +% +% But don't do any of this if we're not in vertical mode. We +% don't want to do a \vskip and prematurely end a paragraph. +% +% Avoid page breaks due to these extra skips, too. +% +% But wait, there is a catch there: +% We'll have to check whether \lastskip is zero skip. \ifdim is not +% sufficient for this purpose, as it ignores stretch and shrink parts +% of the skip. The only way seems to be to check the textual +% representation of the skip. +% +% The following is almost like \def\zeroskipmacro{0.0pt} except that +% the ``p'' and ``t'' characters have catcode \other, not 11 (letter). +% +\edef\zeroskipmacro{\expandafter\the\csname z@skip\endcsname} +% +\newskip\whatsitskip +\newcount\whatsitpenalty +% +% ..., ready, GO: +% +\def\safewhatsit#1{\ifhmode + #1% + \else + % \lastskip and \lastpenalty cannot both be nonzero simultaneously. + \whatsitskip = \lastskip + \edef\lastskipmacro{\the\lastskip}% + \whatsitpenalty = \lastpenalty + % + % If \lastskip is nonzero, that means the last item was a + % skip. And since a skip is discardable, that means this + % -\whatsitskip glue we're inserting is preceded by a + % non-discardable item, therefore it is not a potential + % breakpoint, therefore no \nobreak needed. + \ifx\lastskipmacro\zeroskipmacro + \else + \vskip-\whatsitskip + \fi + % + #1% + % + \ifx\lastskipmacro\zeroskipmacro + % If \lastskip was zero, perhaps the last item was a penalty, and + % perhaps it was >=10000, e.g., a \nobreak. In that case, we want + % to re-insert the same penalty (values >10000 are used for various + % signals); since we just inserted a non-discardable item, any + % following glue (such as a \parskip) would be a breakpoint. For example: + % @deffn deffn-whatever + % @vindex index-whatever + % Description. + % would allow a break between the index-whatever whatsit + % and the "Description." paragraph. + \ifnum\whatsitpenalty>9999 \penalty\whatsitpenalty \fi + \else + % On the other hand, if we had a nonzero \lastskip, + % this make-up glue would be preceded by a non-discardable item + % (the whatsit from the \write), so we must insert a \nobreak. + \nobreak\vskip\whatsitskip + \fi +\fi} + +% The index entry written in the file actually looks like +% \entry {sortstring}{page}{topic} +% or +% \entry {sortstring}{page}{topic}{subtopic} +% The texindex program reads in these files and writes files +% containing these kinds of lines: +% \initial {c} +% before the first topic whose initial is c +% \entry {topic}{pagelist} +% for a topic that is used without subtopics +% \primary {topic} +% for the beginning of a topic that is used with subtopics +% \secondary {subtopic}{pagelist} +% for each subtopic. + +% Define the user-accessible indexing commands +% @findex, @vindex, @kindex, @cindex. + +\def\findex {\fnindex} +\def\kindex {\kyindex} +\def\cindex {\cpindex} +\def\vindex {\vrindex} +\def\tindex {\tpindex} +\def\pindex {\pgindex} + +\def\cindexsub {\begingroup\obeylines\cindexsub} +{\obeylines % +\gdef\cindexsub "#1" #2^^M{\endgroup % +\dosubind{cp}{#2}{#1}}} + +% Define the macros used in formatting output of the sorted index material. + +% @printindex causes a particular index (the ??s file) to get printed. +% It does not print any chapter heading (usually an @unnumbered). +% +\parseargdef\printindex{\begingroup + \dobreak \chapheadingskip{10000}% + % + \smallfonts \rm + \tolerance = 9500 + \plainfrenchspacing + \everypar = {}% don't want the \kern\-parindent from indentation suppression. + % + % See if the index file exists and is nonempty. + % Change catcode of @ here so that if the index file contains + % \initial {@} + % as its first line, TeX doesn't complain about mismatched braces + % (because it thinks @} is a control sequence). + \catcode`\@ = 12 + % See comment in \requireopenindexfile. + \def\indexname{#1}\ifx\indexname\indexisfl\def\indexname{f1}\fi + \openin 1 \jobname.\indexname s + \ifeof 1 + % \enddoublecolumns gets confused if there is no text in the index, + % and it loses the chapter title and the aux file entries for the + % index. The easiest way to prevent this problem is to make sure + % there is some text. + \putwordIndexNonexistent + \typeout{No file \jobname.\indexname s.}% + \else + \catcode`\\ = 0 + % + % If the index file exists but is empty, then \openin leaves \ifeof + % false. We have to make TeX try to read something from the file, so + % it can discover if there is anything in it. + \read 1 to \thisline + \ifeof 1 + \putwordIndexIsEmpty + \else + % Index files are almost Texinfo source, but we use \ as the escape + % character. It would be better to use @, but that's too big a change + % to make right now. + \def\indexbackslash{\ttbackslash}% + \let\indexlbrace\{ % Likewise, set these sequences for braces + \let\indexrbrace\} % used in the sort key. + \begindoublecolumns + \let\entrywidowpenalty=\indexwidowpenalty + % + % Read input from the index file line by line. + \loopdo + \ifeof1 + \let\firsttoken\relax + \else + \read 1 to \nextline + \edef\act{\gdef\noexpand\firsttoken{\getfirsttoken\nextline}}% + \act + \fi + \thisline + % + \ifeof1\else + \let\thisline\nextline + \repeat + %% + \enddoublecolumns + \fi + \fi + \closein 1 +\endgroup} + +\def\getfirsttoken#1{\expandafter\getfirsttokenx#1\endfirsttoken} +\long\def\getfirsttokenx#1#2\endfirsttoken{\noexpand#1} + +\def\loopdo#1\repeat{\def\body{#1}\loopdoxxx} +\def\loopdoxxx{\let\next=\relax\body\let\next=\loopdoxxx\fi\next} + +% These macros are used by the sorted index file itself. +% Change them to control the appearance of the index. + +{\catcode`\/=13 \catcode`\-=13 \catcode`\^=13 \catcode`\~=13 \catcode`\_=13 +\catcode`\|=13 \catcode`\<=13 \catcode`\>=13 \catcode`\+=13 \catcode`\"=13 +\catcode`\$=3 +\gdef\initialglyphs{% + % Some changes for non-alphabetic characters. Using the glyphs from the + % math fonts looks more consistent than the typewriter font used elsewhere + % for these characters. + \def\indexbackslash{\math{\backslash}}% + \let\\=\indexbackslash + % + % Can't get bold backslash so don't use bold forward slash + \catcode`\/=13 + \def/{{\secrmnotbold \normalslash}}% + \def-{{\normaldash\normaldash}}% en dash `--' + \def^{{\chapbf \normalcaret}}% + \def~{{\chapbf \normaltilde}}% + \def\_{% + \leavevmode \kern.07em \vbox{\hrule width.3em height.1ex}\kern .07em }% + \def|{$\vert$}% + \def<{$\less$}% + \def>{$\gtr$}% + \def+{$\normalplus$}% +}} + +\def\initial{% + \bgroup + \initialglyphs + \initialx +} + +\def\initialx#1{% + % Remove any glue we may have, we'll be inserting our own. + \removelastskip + % + % We like breaks before the index initials, so insert a bonus. + % The glue before the bonus allows a little bit of space at the + % bottom of a column to reduce an increase in inter-line spacing. + \nobreak + \vskip 0pt plus 5\baselineskip + \penalty -300 + \vskip 0pt plus -5\baselineskip + % + % Typeset the initial. Making this add up to a whole number of + % baselineskips increases the chance of the dots lining up from column + % to column. It still won't often be perfect, because of the stretch + % we need before each entry, but it's better. + % + % No shrink because it confuses \balancecolumns. + \vskip 1.67\baselineskip plus 1\baselineskip + \leftline{\secfonts \kern-0.05em \secbf #1}% + % \secfonts is inside the argument of \leftline so that the change of + % \baselineskip will not affect any glue inserted before the vbox that + % \leftline creates. + % Do our best not to break after the initial. + \nobreak + \vskip .33\baselineskip plus .1\baselineskip + \egroup % \initialglyphs +} + +\newdimen\entryrightmargin +\entryrightmargin=0pt + +% \entry typesets a paragraph consisting of the text (#1), dot leaders, and +% then page number (#2) flushed to the right margin. It is used for index +% and table of contents entries. The paragraph is indented by \leftskip. +% +\def\entry{% + \begingroup + % + % For pdfTeX and XeTeX. + % The redefinition of \domark stops marks being added in \pdflink to + % preserve coloured links across page boundaries. Otherwise the marks + % would get in the way of \lastbox in \insertindexentrybox. + \let\domark\relax + % + % Start a new paragraph if necessary, so our assignments below can't + % affect previous text. + \par + % + % No extra space above this paragraph. + \parskip = 0in + % + % When reading the text of entry, convert explicit line breaks + % from @* into spaces. The user might give these in long section + % titles, for instance. + \def\*{\unskip\space\ignorespaces}% + \def\entrybreak{\hfil\break}% An undocumented command + % + % Swallow the left brace of the text (first parameter): + \afterassignment\doentry + \let\temp = +} +\def\entrybreak{\unskip\space\ignorespaces}% +\def\doentry{% + % Save the text of the entry + \global\setbox\boxA=\hbox\bgroup + \bgroup % Instead of the swallowed brace. + \noindent + \aftergroup\finishentry + % And now comes the text of the entry. + % Not absorbing as a macro argument reduces the chance of problems + % with catcodes occurring. +} +{\catcode`\@=11 +\gdef\finishentry#1{% + \egroup % end box A + \dimen@ = \wd\boxA % Length of text of entry + \global\setbox\boxA=\hbox\bgroup\unhbox\boxA + % #1 is the page number. + % + % Get the width of the page numbers, and only use + % leaders if they are present. + \global\setbox\boxB = \hbox{#1}% + \ifdim\wd\boxB = 0pt + \null\nobreak\hfill\ % + \else + % + \null\nobreak\indexdotfill % Have leaders before the page number. + % + \ifpdf + \pdfgettoks#1.% + \hskip\skip\thinshrinkable\the\toksA + \else + \ifx\XeTeXrevision\thisisundefined + \hskip\skip\thinshrinkable #1% + \else + \pdfgettoks#1.% + \hskip\skip\thinshrinkable\the\toksA + \fi + \fi + \fi + \egroup % end \boxA + \ifdim\wd\boxB = 0pt + \global\setbox\entryindexbox=\vbox{\unhbox\boxA}% + \else + \global\setbox\entryindexbox=\vbox\bgroup + % We want the text of the entries to be aligned to the left, and the + % page numbers to be aligned to the right. + % + \parindent = 0pt + \advance\leftskip by 0pt plus 1fil + \advance\leftskip by 0pt plus -1fill + \rightskip = 0pt plus -1fil + \advance\rightskip by 0pt plus 1fill + % Cause last line, which could consist of page numbers on their own + % if the list of page numbers is long, to be aligned to the right. + \parfillskip=0pt plus -1fill + % + \advance\rightskip by \entryrightmargin + % Determine how far we can stretch into the margin. + % This allows, e.g., "Appendix H GNU Free Documentation License" to + % fit on one line in @letterpaper format. + \ifdim\entryrightmargin>2.1em + \dimen@i=2.1em + \else + \dimen@i=0em + \fi + \advance \parfillskip by 0pt minus 1\dimen@i + % + \dimen@ii = \hsize + \advance\dimen@ii by -1\leftskip + \advance\dimen@ii by -1\entryrightmargin + \advance\dimen@ii by 1\dimen@i + \ifdim\wd\boxA > \dimen@ii % If the entry doesn't fit in one line + \ifdim\dimen@ > 0.8\dimen@ii % due to long index text + \dimen@ = 0.7\dimen@ % Try to split the text roughly evenly + \dimen@ii = \hsize + \ifnum\dimen@>\dimen@ii + % If the entry is too long, use the whole line + \dimen@ = \dimen@ii + \fi + \advance\leftskip by 0pt plus 1fill % ragged right + \advance \dimen@ by 1\rightskip + \parshape = 2 0pt \dimen@ 0em \dimen@ii + % Ideally we'd add a finite glue at the end of the first line only, + % instead of using \parshape with explicit line lengths, but TeX + % doesn't seem to provide a way to do such a thing. + % + \leftskip = 1em + \parindent = -1em + \fi\fi + \indent % start paragraph + \unhbox\boxA + % + % Do not prefer a separate line ending with a hyphen to fewer lines. + \finalhyphendemerits = 0 + % + % Word spacing - no stretch + \spaceskip=\fontdimen2\font minus \fontdimen4\font + % + \linepenalty=1000 % Discourage line breaks. + \hyphenpenalty=5000 % Discourage hyphenation. + % + \par % format the paragraph + \egroup % The \vbox + \fi + \endgroup + % delay text of entry until after penalty + \bgroup\aftergroup\insertindexentrybox + \entrywidowpenalty +}} + +\newskip\thinshrinkable +\skip\thinshrinkable=.15em minus .15em + +\newbox\entryindexbox +\def\insertindexentrybox{% + \ourunvbox\entryindexbox +} + +% Use \lastbox to take apart vbox box by box, and add each sub-box +% to the current vertical list. +\def\ourunvbox#1{% +\bgroup % for local binding of \delayedbox + % Remove the last box from box #1 + \global\setbox#1=\vbox{% + \unvbox#1% + \unskip % remove any glue + \unpenalty + \global\setbox\interbox=\lastbox + }% + \setbox\delayedbox=\box\interbox + \ifdim\ht#1=0pt\else + \ourunvbox#1 % Repeat on what's left of the box + \nobreak + \fi + \box\delayedbox +\egroup +} +\newbox\delayedbox +\newbox\interbox + +% Default is no penalty +\let\entrywidowpenalty\egroup + +% Used from \printindex. \firsttoken should be the first token +% after the \entry. If it's not another \entry, we are at the last +% line of a group of index entries, so insert a penalty to discourage +% widowed index entries. +\long\def\indexwidowpenalty{% + \def\isentry{\entry}% + \ifx\firsttoken\isentry + \else + \penalty 9000 + \fi + \egroup % now comes the box added with \aftergroup +} + +% Like plain.tex's \dotfill, except uses up at least 1 em. +% The filll stretch here overpowers both the fil and fill stretch to push +% the page number to the right. +\def\indexdotfill{\cleaders + \hbox{$\mathsurround=0pt \mkern1.5mu.\mkern1.5mu$}\hskip 1em plus 1filll} + + +\def\primary #1{\line{#1\hfil}} + +\newskip\secondaryindent \secondaryindent=0.5cm +\def\secondary#1#2{{% + \parfillskip=0in + \parskip=0in + \hangindent=1in + \hangafter=1 + \noindent\hskip\secondaryindent\hbox{#1}\indexdotfill + \ifpdf + \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph. + \else + \ifx\XeTeXrevision\thisisundefined + #2 + \else + \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph. + \fi + \fi + \par +}} + +% Define two-column mode, which we use to typeset indexes. +% Adapted from the TeXbook, page 416, which is to say, +% the manmac.tex format used to print the TeXbook itself. +\catcode`\@=11 % private names + +\newbox\partialpage +\newdimen\doublecolumnhsize + +% Use inside an output routine to save \topmark and \firstmark +\def\savemarks{% + \global\savedtopmark=\expandafter{\topmark }% + \global\savedfirstmark=\expandafter{\firstmark }% +} +\newtoks\savedtopmark +\newtoks\savedfirstmark + +% Set \topmark and \firstmark for next time \output runs. +% Can't be run from withinside \output (because any material +% added while an output routine is active, including +% penalties, is saved for after it finishes). The page so far +% should be empty, otherwise what's on it will be thrown away. +\def\restoremarks{% + \mark{\the\savedtopmark}% + \bgroup\output = {% + \setbox\dummybox=\box\PAGE + }abc\eject\egroup + % "abc" because output routine doesn't fire for a completely empty page. + \mark{\the\savedfirstmark}% +} + +\def\begindoublecolumns{\begingroup % ended by \enddoublecolumns + % If not much space left on page, start a new page. + \ifdim\pagetotal>0.8\vsize\vfill\eject\fi + % + % Grab any single-column material above us. + \output = {% + % + % Here is a possibility not foreseen in manmac: if we accumulate a + % whole lot of material, we might end up calling this \output + % routine twice in a row (see the doublecol-lose test, which is + % essentially a couple of indexes with @setchapternewpage off). In + % that case we just ship out what is in \partialpage with the normal + % output routine. Generally, \partialpage will be empty when this + % runs and this will be a no-op. See the indexspread.tex test case. + \ifvoid\partialpage \else + \onepageout{\pagecontents\partialpage}% + \fi + % + \global\setbox\partialpage = \vbox{% + % Unvbox the main output page. + \unvbox\PAGE + \kern-\topskip \kern\baselineskip + }% + \savemarks + }% + \eject % run that output routine to set \partialpage + \restoremarks + % + % We recover the two marks that the last output routine saved in order + % to propagate the information in marks added around a chapter heading, + % which could be otherwise be lost by the time the final page is output. + % + % + % Use the double-column output routine for subsequent pages. + \output = {\doublecolumnout}% + % + % Change the page size parameters. We could do this once outside this + % routine, in each of @smallbook, @afourpaper, and the default 8.5x11 + % format, but then we repeat the same computation. Repeating a couple + % of assignments once per index is clearly meaningless for the + % execution time, so we may as well do it in one place. + % + % First we halve the line length, less a little for the gutter between + % the columns. We compute the gutter based on the line length, so it + % changes automatically with the paper format. The magic constant + % below is chosen so that the gutter has the same value (well, +-<1pt) + % as it did when we hard-coded it. + % + % We put the result in a separate register, \doublecolumhsize, so we + % can restore it in \pagesofar, after \hsize itself has (potentially) + % been clobbered. + % + \doublecolumnhsize = \hsize + \advance\doublecolumnhsize by -.04154\hsize + \divide\doublecolumnhsize by 2 + \hsize = \doublecolumnhsize + % + % Double the \vsize as well. (We don't need a separate register here, + % since nobody clobbers \vsize.) + \vsize = 2\vsize + % + % For the benefit of balancing columns + \advance\baselineskip by 0pt plus 0.5pt +} + +% The double-column output routine for all double-column pages except +% the last, which is done by \balancecolumns. +% +\def\doublecolumnout{% + % + \splittopskip=\topskip \splitmaxdepth=\maxdepth + % Get the available space for the double columns -- the normal + % (undoubled) page height minus any material left over from the + % previous page. + \dimen@ = \vsize + \divide\dimen@ by 2 + \advance\dimen@ by -\ht\partialpage + % + % box0 will be the left-hand column, box2 the right. + \setbox0=\vsplit255 to\dimen@ \setbox2=\vsplit255 to\dimen@ + \onepageout\pagesofar + \unvbox255 + \penalty\outputpenalty +} +% +% Re-output the contents of the output page -- any previous material, +% followed by the two boxes we just split, in box0 and box2. +\def\pagesofar{% + \unvbox\partialpage + % + \hsize = \doublecolumnhsize + \wd0=\hsize \wd2=\hsize + \hbox to\txipagewidth{\box0\hfil\box2}% +} + + +% Finished with with double columns. +\def\enddoublecolumns{% + % The following penalty ensures that the page builder is exercised + % _before_ we change the output routine. This is necessary in the + % following situation: + % + % The last section of the index consists only of a single entry. + % Before this section, \pagetotal is less than \pagegoal, so no + % break occurs before the last section starts. However, the last + % section, consisting of \initial and the single \entry, does not + % fit on the page and has to be broken off. Without the following + % penalty the page builder will not be exercised until \eject + % below, and by that time we'll already have changed the output + % routine to the \balancecolumns version, so the next-to-last + % double-column page will be processed with \balancecolumns, which + % is wrong: The two columns will go to the main vertical list, with + % the broken-off section in the recent contributions. As soon as + % the output routine finishes, TeX starts reconsidering the page + % break. The two columns and the broken-off section both fit on the + % page, because the two columns now take up only half of the page + % goal. When TeX sees \eject from below which follows the final + % section, it invokes the new output routine that we've set after + % \balancecolumns below; \onepageout will try to fit the two columns + % and the final section into the vbox of \txipageheight (see + % \pagebody), causing an overfull box. + % + % Note that glue won't work here, because glue does not exercise the + % page builder, unlike penalties (see The TeXbook, pp. 280-281). + \penalty0 + % + \output = {% + % Split the last of the double-column material. + \savemarks + \balancecolumns + % + % Having called \balancecolumns once, we do not + % want to call it again. Therefore, reset \output to its normal + % definition right away. + \global\output = {\onepageout{\pagecontents\PAGE}}% + }% + \eject + \endgroup % started in \begindoublecolumns + \restoremarks + % Leave the double-column material on the current page, no automatic + % page break. + \box\balancedcolumns + % + % \pagegoal was set to the doubled \vsize above, since we restarted + % the current page. We're now back to normal single-column + % typesetting, so reset \pagegoal to the normal \vsize (after the + % \endgroup where \vsize got restored). + \pagegoal = \vsize +} +\newbox\balancedcolumns +\setbox\balancedcolumns=\vbox{shouldnt see this}% +% +% Only called for the last of the double column material. \doublecolumnout +% does the others. +\def\balancecolumns{% + \setbox0 = \vbox{\unvbox255}% like \box255 but more efficient, see p.120. + \dimen@ = \ht0 + \advance\dimen@ by \topskip + \advance\dimen@ by-\baselineskip + \ifdim\dimen@<5\baselineskip + % Don't split a short final column in two. + \setbox2=\vbox{}% + \else + \divide\dimen@ by 2 % target to split to + \dimen@ii = \dimen@ + \splittopskip = \topskip + % Loop until left column is at least as high as the right column. + {% + \vbadness = 10000 + \loop + \global\setbox3 = \copy0 + \global\setbox1 = \vsplit3 to \dimen@ + \ifdim\ht1<\ht3 + \global\advance\dimen@ by 1pt + \repeat + }% + % Now the left column is in box 1, and the right column in box 3. + % Check whether the left column has come out higher than the page itself. + % (Note that we have doubled \vsize for the double columns, so + % the actual height of the page is 0.5\vsize). + \ifdim2\ht1>\vsize + % Just split the last of the double column material roughly in half. + \setbox2=\box0 + \setbox0 = \vsplit2 to \dimen@ii + \setbox0=\vbox to \dimen@ii {\unvbox0\vfill}% + \setbox2=\vbox to \dimen@ii {\unvbox2\vfill}% + \else + % Compare the heights of the two columns. + \ifdim4\ht1>5\ht3 + % Column heights are too different, so don't make their bottoms + % flush with each other. + \setbox2=\vbox to \ht1 {\unvbox3\vfill}% + \setbox0=\vbox to \ht1 {\unvbox1\vfill}% + \else + % Make column bottoms flush with each other. + \setbox2=\vbox to\ht1{\unvbox3\unskip}% + \setbox0=\vbox to\ht1{\unvbox1\unskip}% + \fi + \fi + \fi + % + \global\setbox\balancedcolumns=\vbox{\pagesofar}% +} +\catcode`\@ = \other + + +\message{sectioning,} +% Chapters, sections, etc. + +% Let's start with @part. +\outer\parseargdef\part{\partzzz{#1}} +\def\partzzz#1{% + \chapoddpage + \null + \vskip.3\vsize % move it down on the page a bit + \begingroup + \noindent \titlefonts\rm #1\par % the text + \let\lastnode=\empty % no node to associate with + \writetocentry{part}{#1}{}% but put it in the toc + \headingsoff % no headline or footline on the part page + % This outputs a mark at the end of the page that clears \thischapter + % and \thissection, as is done in \startcontents. + \let\pchapsepmacro\relax + \chapmacro{}{Yomitfromtoc}{}% + \chapoddpage + \endgroup +} + +% \unnumberedno is an oxymoron. But we count the unnumbered +% sections so that we can refer to them unambiguously in the pdf +% outlines by their "section number". We avoid collisions with chapter +% numbers by starting them at 10000. (If a document ever has 10000 +% chapters, we're in trouble anyway, I'm sure.) +\newcount\unnumberedno \unnumberedno = 10000 +\newcount\chapno +\newcount\secno \secno=0 +\newcount\subsecno \subsecno=0 +\newcount\subsubsecno \subsubsecno=0 + +% This counter is funny since it counts through charcodes of letters A, B, ... +\newcount\appendixno \appendixno = `\@ +% +% \def\appendixletter{\char\the\appendixno} +% We do the following ugly conditional instead of the above simple +% construct for the sake of pdftex, which needs the actual +% letter in the expansion, not just typeset. +% +\def\appendixletter{% + \ifnum\appendixno=`A A% + \else\ifnum\appendixno=`B B% + \else\ifnum\appendixno=`C C% + \else\ifnum\appendixno=`D D% + \else\ifnum\appendixno=`E E% + \else\ifnum\appendixno=`F F% + \else\ifnum\appendixno=`G G% + \else\ifnum\appendixno=`H H% + \else\ifnum\appendixno=`I I% + \else\ifnum\appendixno=`J J% + \else\ifnum\appendixno=`K K% + \else\ifnum\appendixno=`L L% + \else\ifnum\appendixno=`M M% + \else\ifnum\appendixno=`N N% + \else\ifnum\appendixno=`O O% + \else\ifnum\appendixno=`P P% + \else\ifnum\appendixno=`Q Q% + \else\ifnum\appendixno=`R R% + \else\ifnum\appendixno=`S S% + \else\ifnum\appendixno=`T T% + \else\ifnum\appendixno=`U U% + \else\ifnum\appendixno=`V V% + \else\ifnum\appendixno=`W W% + \else\ifnum\appendixno=`X X% + \else\ifnum\appendixno=`Y Y% + \else\ifnum\appendixno=`Z Z% + % The \the is necessary, despite appearances, because \appendixletter is + % expanded while writing the .toc file. \char\appendixno is not + % expandable, thus it is written literally, thus all appendixes come out + % with the same letter (or @) in the toc without it. + \else\char\the\appendixno + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi} + +% Each @chapter defines these (using marks) as the number+name, number +% and name of the chapter. Page headings and footings can use +% these. @section does likewise. +\def\thischapter{} +\def\thischapternum{} +\def\thischaptername{} +\def\thissection{} +\def\thissectionnum{} +\def\thissectionname{} + +\newcount\absseclevel % used to calculate proper heading level +\newcount\secbase\secbase=0 % @raisesections/@lowersections modify this count + +% @raisesections: treat @section as chapter, @subsection as section, etc. +\def\raisesections{\global\advance\secbase by -1} +\let\up=\raisesections % original BFox name + +% @lowersections: treat @chapter as section, @section as subsection, etc. +\def\lowersections{\global\advance\secbase by 1} +\let\down=\lowersections % original BFox name + +% we only have subsub. +\chardef\maxseclevel = 3 +% +% A numbered section within an unnumbered changes to unnumbered too. +% To achieve this, remember the "biggest" unnum. sec. we are currently in: +\chardef\unnlevel = \maxseclevel +% +% Trace whether the current chapter is an appendix or not: +% \chapheadtype is "N" or "A", unnumbered chapters are ignored. +\def\chapheadtype{N} + +% Choose a heading macro +% #1 is heading type +% #2 is heading level +% #3 is text for heading +\def\genhead#1#2#3{% + % Compute the abs. sec. level: + \absseclevel=#2 + \advance\absseclevel by \secbase + % Make sure \absseclevel doesn't fall outside the range: + \ifnum \absseclevel < 0 + \absseclevel = 0 + \else + \ifnum \absseclevel > 3 + \absseclevel = 3 + \fi + \fi + % The heading type: + \def\headtype{#1}% + \if \headtype U% + \ifnum \absseclevel < \unnlevel + \chardef\unnlevel = \absseclevel + \fi + \else + % Check for appendix sections: + \ifnum \absseclevel = 0 + \edef\chapheadtype{\headtype}% + \else + \if \headtype A\if \chapheadtype N% + \errmessage{@appendix... within a non-appendix chapter}% + \fi\fi + \fi + % Check for numbered within unnumbered: + \ifnum \absseclevel > \unnlevel + \def\headtype{U}% + \else + \chardef\unnlevel = 3 + \fi + \fi + % Now print the heading: + \if \headtype U% + \ifcase\absseclevel + \unnumberedzzz{#3}% + \or \unnumberedseczzz{#3}% + \or \unnumberedsubseczzz{#3}% + \or \unnumberedsubsubseczzz{#3}% + \fi + \else + \if \headtype A% + \ifcase\absseclevel + \appendixzzz{#3}% + \or \appendixsectionzzz{#3}% + \or \appendixsubseczzz{#3}% + \or \appendixsubsubseczzz{#3}% + \fi + \else + \ifcase\absseclevel + \chapterzzz{#3}% + \or \seczzz{#3}% + \or \numberedsubseczzz{#3}% + \or \numberedsubsubseczzz{#3}% + \fi + \fi + \fi + \suppressfirstparagraphindent +} + +% an interface: +\def\numhead{\genhead N} +\def\apphead{\genhead A} +\def\unnmhead{\genhead U} + +% @chapter, @appendix, @unnumbered. Increment top-level counter, reset +% all lower-level sectioning counters to zero. +% +% Also set \chaplevelprefix, which we prepend to @float sequence numbers +% (e.g., figures), q.v. By default (before any chapter), that is empty. +\let\chaplevelprefix = \empty +% +\outer\parseargdef\chapter{\numhead0{#1}} % normally numhead0 calls chapterzzz +\def\chapterzzz#1{% + % section resetting is \global in case the chapter is in a group, such + % as an @include file. + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\chapno by 1 + % + % Used for \float. + \gdef\chaplevelprefix{\the\chapno.}% + \resetallfloatnos + % + % \putwordChapter can contain complex things in translations. + \toks0=\expandafter{\putwordChapter}% + \message{\the\toks0 \space \the\chapno}% + % + % Write the actual heading. + \chapmacro{#1}{Ynumbered}{\the\chapno}% + % + % So @section and the like are numbered underneath this chapter. + \global\let\section = \numberedsec + \global\let\subsection = \numberedsubsec + \global\let\subsubsection = \numberedsubsubsec +} + +\outer\parseargdef\appendix{\apphead0{#1}} % normally calls appendixzzz +% +\def\appendixzzz#1{% + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\appendixno by 1 + \gdef\chaplevelprefix{\appendixletter.}% + \resetallfloatnos + % + % \putwordAppendix can contain complex things in translations. + \toks0=\expandafter{\putwordAppendix}% + \message{\the\toks0 \space \appendixletter}% + % + \chapmacro{#1}{Yappendix}{\appendixletter}% + % + \global\let\section = \appendixsec + \global\let\subsection = \appendixsubsec + \global\let\subsubsection = \appendixsubsubsec +} + +% normally unnmhead0 calls unnumberedzzz: +\outer\parseargdef\unnumbered{\unnmhead0{#1}} +\def\unnumberedzzz#1{% + \global\secno=0 \global\subsecno=0 \global\subsubsecno=0 + \global\advance\unnumberedno by 1 + % + % Since an unnumbered has no number, no prefix for figures. + \global\let\chaplevelprefix = \empty + \resetallfloatnos + % + % This used to be simply \message{#1}, but TeX fully expands the + % argument to \message. Therefore, if #1 contained @-commands, TeX + % expanded them. For example, in `@unnumbered The @cite{Book}', TeX + % expanded @cite (which turns out to cause errors because \cite is meant + % to be executed, not expanded). + % + % Anyway, we don't want the fully-expanded definition of @cite to appear + % as a result of the \message, we just want `@cite' itself. We use + % \the<toks register> to achieve this: TeX expands \the<toks> only once, + % simply yielding the contents of <toks register>. (We also do this for + % the toc entries.) + \toks0 = {#1}% + \message{(\the\toks0)}% + % + \chapmacro{#1}{Ynothing}{\the\unnumberedno}% + % + \global\let\section = \unnumberedsec + \global\let\subsection = \unnumberedsubsec + \global\let\subsubsection = \unnumberedsubsubsec +} + +% @centerchap is like @unnumbered, but the heading is centered. +\outer\parseargdef\centerchap{% + \let\centerparametersmaybe = \centerparameters + \unnmhead0{#1}% + \let\centerparametersmaybe = \relax +} + +% @top is like @unnumbered. +\let\top\unnumbered + +% Sections. +% +\outer\parseargdef\numberedsec{\numhead1{#1}} % normally calls seczzz +\def\seczzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Ynumbered}{\the\chapno.\the\secno}% +} + +% normally calls appendixsectionzzz: +\outer\parseargdef\appendixsection{\apphead1{#1}} +\def\appendixsectionzzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Yappendix}{\appendixletter.\the\secno}% +} +\let\appendixsec\appendixsection + +% normally calls unnumberedseczzz: +\outer\parseargdef\unnumberedsec{\unnmhead1{#1}} +\def\unnumberedseczzz#1{% + \global\subsecno=0 \global\subsubsecno=0 \global\advance\secno by 1 + \sectionheading{#1}{sec}{Ynothing}{\the\unnumberedno.\the\secno}% +} + +% Subsections. +% +% normally calls numberedsubseczzz: +\outer\parseargdef\numberedsubsec{\numhead2{#1}} +\def\numberedsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Ynumbered}{\the\chapno.\the\secno.\the\subsecno}% +} + +% normally calls appendixsubseczzz: +\outer\parseargdef\appendixsubsec{\apphead2{#1}} +\def\appendixsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Yappendix}% + {\appendixletter.\the\secno.\the\subsecno}% +} + +% normally calls unnumberedsubseczzz: +\outer\parseargdef\unnumberedsubsec{\unnmhead2{#1}} +\def\unnumberedsubseczzz#1{% + \global\subsubsecno=0 \global\advance\subsecno by 1 + \sectionheading{#1}{subsec}{Ynothing}% + {\the\unnumberedno.\the\secno.\the\subsecno}% +} + +% Subsubsections. +% +% normally numberedsubsubseczzz: +\outer\parseargdef\numberedsubsubsec{\numhead3{#1}} +\def\numberedsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Ynumbered}% + {\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% normally appendixsubsubseczzz: +\outer\parseargdef\appendixsubsubsec{\apphead3{#1}} +\def\appendixsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Yappendix}% + {\appendixletter.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% normally unnumberedsubsubseczzz: +\outer\parseargdef\unnumberedsubsubsec{\unnmhead3{#1}} +\def\unnumberedsubsubseczzz#1{% + \global\advance\subsubsecno by 1 + \sectionheading{#1}{subsubsec}{Ynothing}% + {\the\unnumberedno.\the\secno.\the\subsecno.\the\subsubsecno}% +} + +% These macros control what the section commands do, according +% to what kind of chapter we are in (ordinary, appendix, or unnumbered). +% Define them by default for a numbered chapter. +\let\section = \numberedsec +\let\subsection = \numberedsubsec +\let\subsubsection = \numberedsubsubsec + +% Define @majorheading, @heading and @subheading + +\def\majorheading{% + {\advance\chapheadingskip by 10pt \chapbreak }% + \parsearg\chapheadingzzz +} + +\def\chapheading{\chapbreak \parsearg\chapheadingzzz} +\def\chapheadingzzz#1{% + \vbox{\chapfonts \raggedtitlesettings #1\par}% + \nobreak\bigskip \nobreak + \suppressfirstparagraphindent +} + +% @heading, @subheading, @subsubheading. +\parseargdef\heading{\sectionheading{#1}{sec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} +\parseargdef\subheading{\sectionheading{#1}{subsec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} +\parseargdef\subsubheading{\sectionheading{#1}{subsubsec}{Yomitfromtoc}{} + \suppressfirstparagraphindent} + +% These macros generate a chapter, section, etc. heading only +% (including whitespace, linebreaking, etc. around it), +% given all the information in convenient, parsed form. + +% Args are the skip and penalty (usually negative) +\def\dobreak#1#2{\par\ifdim\lastskip<#1\removelastskip\penalty#2\vskip#1\fi} + +% Parameter controlling skip before chapter headings (if needed) +\newskip\chapheadingskip + +% Define plain chapter starts, and page on/off switching for it. +\def\chapbreak{\dobreak \chapheadingskip {-4000}} + +% Start a new page +\def\chappager{\par\vfill\supereject} + +% \chapoddpage - start on an odd page for a new chapter +% Because \domark is called before \chapoddpage, the filler page will +% get the headings for the next chapter, which is wrong. But we don't +% care -- we just disable all headings on the filler page. +\def\chapoddpage{% + \chappager + \ifodd\pageno \else + \begingroup + \headingsoff + \null + \chappager + \endgroup + \fi +} + +\parseargdef\setchapternewpage{\csname CHAPPAG#1\endcsname} + +\def\CHAPPAGoff{% +\global\let\contentsalignmacro = \chappager +\global\let\pchapsepmacro=\chapbreak +\global\let\pagealignmacro=\chappager} + +\def\CHAPPAGon{% +\global\let\contentsalignmacro = \chappager +\global\let\pchapsepmacro=\chappager +\global\let\pagealignmacro=\chappager +\global\def\HEADINGSon{\HEADINGSsingle}} + +\def\CHAPPAGodd{% +\global\let\contentsalignmacro = \chapoddpage +\global\let\pchapsepmacro=\chapoddpage +\global\let\pagealignmacro=\chapoddpage +\global\def\HEADINGSon{\HEADINGSdouble}} + +\CHAPPAGon + +% \chapmacro - Chapter opening. +% +% #1 is the text, #2 is the section type (Ynumbered, Ynothing, +% Yappendix, Yomitfromtoc), #3 the chapter number. +% Not used for @heading series. +% +% To test against our argument. +\def\Ynothingkeyword{Ynothing} +\def\Yappendixkeyword{Yappendix} +\def\Yomitfromtockeyword{Yomitfromtoc} +% +\def\chapmacro#1#2#3{% + \expandafter\ifx\thisenv\titlepage\else + \checkenv{}% chapters, etc., should not start inside an environment. + \fi + % FIXME: \chapmacro is currently called from inside \titlepage when + % \setcontentsaftertitlepage to print the "Table of Contents" heading, but + % this should probably be done by \sectionheading with an option to print + % in chapter size. + % + % Insert the first mark before the heading break (see notes for \domark). + \let\prevchapterdefs=\lastchapterdefs + \let\prevsectiondefs=\lastsectiondefs + \gdef\lastsectiondefs{\gdef\thissectionname{}\gdef\thissectionnum{}% + \gdef\thissection{}}% + % + \def\temptype{#2}% + \ifx\temptype\Ynothingkeyword + \gdef\lastchapterdefs{\gdef\thischaptername{#1}\gdef\thischapternum{}% + \gdef\thischapter{\thischaptername}}% + \else\ifx\temptype\Yomitfromtockeyword + \gdef\lastchapterdefs{\gdef\thischaptername{#1}\gdef\thischapternum{}% + \gdef\thischapter{}}% + \else\ifx\temptype\Yappendixkeyword + \toks0={#1}% + \xdef\lastchapterdefs{% + \gdef\noexpand\thischaptername{\the\toks0}% + \gdef\noexpand\thischapternum{\appendixletter}% + % \noexpand\putwordAppendix avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thischapter{\noexpand\putwordAppendix{} + \noexpand\thischapternum: + \noexpand\thischaptername}% + }% + \else + \toks0={#1}% + \xdef\lastchapterdefs{% + \gdef\noexpand\thischaptername{\the\toks0}% + \gdef\noexpand\thischapternum{\the\chapno}% + % \noexpand\putwordChapter avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thischapter{\noexpand\putwordChapter{} + \noexpand\thischapternum: + \noexpand\thischaptername}% + }% + \fi\fi\fi + % + % Output the mark. Pass it through \safewhatsit, to take care of + % the preceding space. + \safewhatsit\domark + % + % Insert the chapter heading break. + \pchapsepmacro + % + % Now the second mark, after the heading break. No break points + % between here and the heading. + \let\prevchapterdefs=\lastchapterdefs + \let\prevsectiondefs=\lastsectiondefs + \domark + % + {% + \chapfonts \rm + \let\footnote=\errfootnoteheading % give better error message + % + % Have to define \lastsection before calling \donoderef, because the + % xref code eventually uses it. On the other hand, it has to be called + % after \pchapsepmacro, or the headline will change too soon. + \gdef\lastsection{#1}% + % + % Only insert the separating space if we have a chapter/appendix + % number, and don't print the unnumbered ``number''. + \ifx\temptype\Ynothingkeyword + \setbox0 = \hbox{}% + \def\toctype{unnchap}% + \else\ifx\temptype\Yomitfromtockeyword + \setbox0 = \hbox{}% contents like unnumbered, but no toc entry + \def\toctype{omit}% + \else\ifx\temptype\Yappendixkeyword + \setbox0 = \hbox{\putwordAppendix{} #3\enspace}% + \def\toctype{app}% + \else + \setbox0 = \hbox{#3\enspace}% + \def\toctype{numchap}% + \fi\fi\fi + % + % Write the toc entry for this chapter. Must come before the + % \donoderef, because we include the current node name in the toc + % entry, and \donoderef resets it to empty. + \writetocentry{\toctype}{#1}{#3}% + % + % For pdftex, we have to write out the node definition (aka, make + % the pdfdest) after any page break, but before the actual text has + % been typeset. If the destination for the pdf outline is after the + % text, then jumping from the outline may wind up with the text not + % being visible, for instance under high magnification. + \donoderef{#2}% + % + % Typeset the actual heading. + \nobreak % Avoid page breaks at the interline glue. + \vbox{\raggedtitlesettings \hangindent=\wd0 \centerparametersmaybe + \unhbox0 #1\par}% + }% + \nobreak\bigskip % no page break after a chapter title + \nobreak +} + +% @centerchap -- centered and unnumbered. +\let\centerparametersmaybe = \relax +\def\centerparameters{% + \advance\rightskip by 3\rightskip + \leftskip = \rightskip + \parfillskip = 0pt +} + + +% Section titles. These macros combine the section number parts and +% call the generic \sectionheading to do the printing. +% +\newskip\secheadingskip +\def\secheadingbreak{\dobreak \secheadingskip{-1000}} + +% Subsection titles. +\newskip\subsecheadingskip +\def\subsecheadingbreak{\dobreak \subsecheadingskip{-500}} + +% Subsubsection titles. +\def\subsubsecheadingskip{\subsecheadingskip} +\def\subsubsecheadingbreak{\subsecheadingbreak} + + +% Print any size, any type, section title. +% +% #1 is the text of the title, +% #2 is the section level (sec/subsec/subsubsec), +% #3 is the section type (Ynumbered, Ynothing, Yappendix, Yomitfromtoc), +% #4 is the section number. +% +\def\seckeyword{sec} +% +\def\sectionheading#1#2#3#4{% + {% + \def\sectionlevel{#2}% + \def\temptype{#3}% + % + % It is ok for the @heading series commands to appear inside an + % environment (it's been historically allowed, though the logic is + % dubious), but not the others. + \ifx\temptype\Yomitfromtockeyword\else + \checkenv{}% non-@*heading should not be in an environment. + \fi + \let\footnote=\errfootnoteheading + % + % Switch to the right set of fonts. + \csname #2fonts\endcsname \rm + % + % Insert first mark before the heading break (see notes for \domark). + \let\prevsectiondefs=\lastsectiondefs + \ifx\temptype\Ynothingkeyword + \ifx\sectionlevel\seckeyword + \gdef\lastsectiondefs{\gdef\thissectionname{#1}\gdef\thissectionnum{}% + \gdef\thissection{\thissectionname}}% + \fi + \else\ifx\temptype\Yomitfromtockeyword + % Don't redefine \thissection. + \else\ifx\temptype\Yappendixkeyword + \ifx\sectionlevel\seckeyword + \toks0={#1}% + \xdef\lastsectiondefs{% + \gdef\noexpand\thissectionname{\the\toks0}% + \gdef\noexpand\thissectionnum{#4}% + % \noexpand\putwordSection avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thissection{\noexpand\putwordSection{} + \noexpand\thissectionnum: + \noexpand\thissectionname}% + }% + \fi + \else + \ifx\sectionlevel\seckeyword + \toks0={#1}% + \xdef\lastsectiondefs{% + \gdef\noexpand\thissectionname{\the\toks0}% + \gdef\noexpand\thissectionnum{#4}% + % \noexpand\putwordSection avoids expanding indigestible + % commands in some of the translations. + \gdef\noexpand\thissection{\noexpand\putwordSection{} + \noexpand\thissectionnum: + \noexpand\thissectionname}% + }% + \fi + \fi\fi\fi + % + % Go into vertical mode. Usually we'll already be there, but we + % don't want the following whatsit to end up in a preceding paragraph + % if the document didn't happen to have a blank line. + \par + % + % Output the mark. Pass it through \safewhatsit, to take care of + % the preceding space. + \safewhatsit\domark + % + % Insert space above the heading. + \csname #2headingbreak\endcsname + % + % Now the second mark, after the heading break. No break points + % between here and the heading. + \global\let\prevsectiondefs=\lastsectiondefs + \domark + % + % Only insert the space after the number if we have a section number. + \ifx\temptype\Ynothingkeyword + \setbox0 = \hbox{}% + \def\toctype{unn}% + \gdef\lastsection{#1}% + \else\ifx\temptype\Yomitfromtockeyword + % for @headings -- no section number, don't include in toc, + % and don't redefine \lastsection. + \setbox0 = \hbox{}% + \def\toctype{omit}% + \let\sectionlevel=\empty + \else\ifx\temptype\Yappendixkeyword + \setbox0 = \hbox{#4\enspace}% + \def\toctype{app}% + \gdef\lastsection{#1}% + \else + \setbox0 = \hbox{#4\enspace}% + \def\toctype{num}% + \gdef\lastsection{#1}% + \fi\fi\fi + % + % Write the toc entry (before \donoderef). See comments in \chapmacro. + \writetocentry{\toctype\sectionlevel}{#1}{#4}% + % + % Write the node reference (= pdf destination for pdftex). + % Again, see comments in \chapmacro. + \donoderef{#3}% + % + % Interline glue will be inserted when the vbox is completed. + % That glue will be a valid breakpoint for the page, since it'll be + % preceded by a whatsit (usually from the \donoderef, or from the + % \writetocentry if there was no node). We don't want to allow that + % break, since then the whatsits could end up on page n while the + % section is on page n+1, thus toc/etc. are wrong. Debian bug 276000. + \nobreak + % + % Output the actual section heading. + \vbox{\hyphenpenalty=10000 \tolerance=5000 \parindent=0pt \ptexraggedright + \hangindent=\wd0 % zero if no section number + \unhbox0 #1}% + }% + % Add extra space after the heading -- half of whatever came above it. + % Don't allow stretch, though. + \kern .5 \csname #2headingskip\endcsname + % + % Do not let the kern be a potential breakpoint, as it would be if it + % was followed by glue. + \nobreak + % + % We'll almost certainly start a paragraph next, so don't let that + % glue accumulate. (Not a breakpoint because it's preceded by a + % discardable item.) However, when a paragraph is not started next + % (\startdefun, \cartouche, \center, etc.), this needs to be wiped out + % or the negative glue will cause weirdly wrong output, typically + % obscuring the section heading with something else. + \vskip-\parskip + % + % This is so the last item on the main vertical list is a known + % \penalty > 10000, so \startdefun, etc., can recognize the situation + % and do the needful. + \penalty 10001 +} + + +\message{toc,} +% Table of contents. +\newwrite\tocfile + +% Write an entry to the toc file, opening it if necessary. +% Called from @chapter, etc. +% +% Example usage: \writetocentry{sec}{Section Name}{\the\chapno.\the\secno} +% We append the current node name (if any) and page number as additional +% arguments for the \{chap,sec,...}entry macros which will eventually +% read this. The node name is used in the pdf outlines as the +% destination to jump to. +% +% We open the .toc file for writing here instead of at @setfilename (or +% any other fixed time) so that @contents can be anywhere in the document. +% But if #1 is `omit', then we don't do anything. This is used for the +% table of contents chapter openings themselves. +% +\newif\iftocfileopened +\def\omitkeyword{omit}% +% +\def\writetocentry#1#2#3{% + \edef\writetoctype{#1}% + \ifx\writetoctype\omitkeyword \else + \iftocfileopened\else + \immediate\openout\tocfile = \jobname.toc + \global\tocfileopenedtrue + \fi + % + \iflinks + {\atdummies + \edef\temp{% + \write\tocfile{@#1entry{#2}{#3}{\lastnode}{\noexpand\folio}}}% + \temp + }% + \fi + \fi + % + % Tell \shipout to create a pdf destination on each page, if we're + % writing pdf. These are used in the table of contents. We can't + % just write one on every page because the title pages are numbered + % 1 and 2 (the page numbers aren't printed), and so are the first + % two pages of the document. Thus, we'd have two destinations named + % `1', and two named `2'. + \ifpdf + \global\pdfmakepagedesttrue + \else + \ifx\XeTeXrevision\thisisundefined + \else + \global\pdfmakepagedesttrue + \fi + \fi +} + + +% These characters do not print properly in the Computer Modern roman +% fonts, so we must take special care. This is more or less redundant +% with the Texinfo input format setup at the end of this file. +% +\def\activecatcodes{% + \catcode`\"=\active + \catcode`\$=\active + \catcode`\<=\active + \catcode`\>=\active + \catcode`\\=\active + \catcode`\^=\active + \catcode`\_=\active + \catcode`\|=\active + \catcode`\~=\active +} + + +% Read the toc file, which is essentially Texinfo input. +\def\readtocfile{% + \setupdatafile + \activecatcodes + \input \tocreadfilename +} + +\newskip\contentsrightmargin \contentsrightmargin=1in +\newcount\savepageno +\newcount\lastnegativepageno \lastnegativepageno = -1 + +% Prepare to read what we've written to \tocfile. +% +\def\startcontents#1{% + % If @setchapternewpage on, and @headings double, the contents should + % start on an odd page, unlike chapters. Thus, we maintain + % \contentsalignmacro in parallel with \pagealignmacro. + % From: Torbjorn Granlund <tege@matematik.su.se> + \contentsalignmacro + \immediate\closeout\tocfile + % + % Don't need to put `Contents' or `Short Contents' in the headline. + % It is abundantly clear what they are. + \chapmacro{#1}{Yomitfromtoc}{}% + % + \savepageno = \pageno + \begingroup % Set up to handle contents files properly. + \raggedbottom % Worry more about breakpoints than the bottom. + \entryrightmargin=\contentsrightmargin % Don't use the full line length. + % + % Roman numerals for page numbers. + \ifnum \pageno>0 \global\pageno = \lastnegativepageno \fi +} + +% redefined for the two-volume lispref. We always output on +% \jobname.toc even if this is redefined. +% +\def\tocreadfilename{\jobname.toc} + +% Normal (long) toc. +% +\def\contents{% + \startcontents{\putwordTOC}% + \openin 1 \tocreadfilename\space + \ifeof 1 \else + \readtocfile + \fi + \vfill \eject + \contentsalignmacro % in case @setchapternewpage odd is in effect + \ifeof 1 \else + \pdfmakeoutlines + \fi + \closein 1 + \endgroup + \lastnegativepageno = \pageno + \global\pageno = \savepageno +} + +% And just the chapters. +\def\summarycontents{% + \startcontents{\putwordShortTOC}% + % + \let\partentry = \shortpartentry + \let\numchapentry = \shortchapentry + \let\appentry = \shortchapentry + \let\unnchapentry = \shortunnchapentry + % We want a true roman here for the page numbers. + \secfonts + \let\rm=\shortcontrm \let\bf=\shortcontbf + \let\sl=\shortcontsl \let\tt=\shortconttt + \rm + \hyphenpenalty = 10000 + \advance\baselineskip by 1pt % Open it up a little. + \def\numsecentry##1##2##3##4{} + \let\appsecentry = \numsecentry + \let\unnsecentry = \numsecentry + \let\numsubsecentry = \numsecentry + \let\appsubsecentry = \numsecentry + \let\unnsubsecentry = \numsecentry + \let\numsubsubsecentry = \numsecentry + \let\appsubsubsecentry = \numsecentry + \let\unnsubsubsecentry = \numsecentry + \openin 1 \tocreadfilename\space + \ifeof 1 \else + \readtocfile + \fi + \closein 1 + \vfill \eject + \contentsalignmacro % in case @setchapternewpage odd is in effect + \endgroup + \lastnegativepageno = \pageno + \global\pageno = \savepageno +} +\let\shortcontents = \summarycontents + +% Typeset the label for a chapter or appendix for the short contents. +% The arg is, e.g., `A' for an appendix, or `3' for a chapter. +% +\def\shortchaplabel#1{% + % This space should be enough, since a single number is .5em, and the + % widest letter (M) is 1em, at least in the Computer Modern fonts. + % But use \hss just in case. + % (This space doesn't include the extra space that gets added after + % the label; that gets put in by \shortchapentry above.) + % + % We'd like to right-justify chapter numbers, but that looks strange + % with appendix letters. And right-justifying numbers and + % left-justifying letters looks strange when there is less than 10 + % chapters. Have to read the whole toc once to know how many chapters + % there are before deciding ... + \hbox to 1em{#1\hss}% +} + +% These macros generate individual entries in the table of contents. +% The first argument is the chapter or section name. +% The last argument is the page number. +% The arguments in between are the chapter number, section number, ... + +% Parts, in the main contents. Replace the part number, which doesn't +% exist, with an empty box. Let's hope all the numbers have the same width. +% Also ignore the page number, which is conventionally not printed. +\def\numeralbox{\setbox0=\hbox{8}\hbox to \wd0{\hfil}} +\def\partentry#1#2#3#4{\dochapentry{\numeralbox\labelspace#1}{}} +% +% Parts, in the short toc. +\def\shortpartentry#1#2#3#4{% + \penalty-300 + \vskip.5\baselineskip plus.15\baselineskip minus.1\baselineskip + \shortchapentry{{\bf #1}}{\numeralbox}{}{}% +} + +% Chapters, in the main contents. +\def\numchapentry#1#2#3#4{\dochapentry{#2\labelspace#1}{#4}} + +% Chapters, in the short toc. +% See comments in \dochapentry re vbox and related settings. +\def\shortchapentry#1#2#3#4{% + \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno\bgroup#4\egroup}% +} + +% Appendices, in the main contents. +% Need the word Appendix, and a fixed-size box. +% +\def\appendixbox#1{% + % We use M since it's probably the widest letter. + \setbox0 = \hbox{\putwordAppendix{} M}% + \hbox to \wd0{\putwordAppendix{} #1\hss}} +% +\def\appentry#1#2#3#4{\dochapentry{\appendixbox{#2}\hskip.7em#1}{#4}} + +% Unnumbered chapters. +\def\unnchapentry#1#2#3#4{\dochapentry{#1}{#4}} +\def\shortunnchapentry#1#2#3#4{\tocentry{#1}{\doshortpageno\bgroup#4\egroup}} + +% Sections. +\def\numsecentry#1#2#3#4{\dosecentry{#2\labelspace#1}{#4}} +\let\appsecentry=\numsecentry +\def\unnsecentry#1#2#3#4{\dosecentry{#1}{#4}} + +% Subsections. +\def\numsubsecentry#1#2#3#4{\dosubsecentry{#2\labelspace#1}{#4}} +\let\appsubsecentry=\numsubsecentry +\def\unnsubsecentry#1#2#3#4{\dosubsecentry{#1}{#4}} + +% And subsubsections. +\def\numsubsubsecentry#1#2#3#4{\dosubsubsecentry{#2\labelspace#1}{#4}} +\let\appsubsubsecentry=\numsubsubsecentry +\def\unnsubsubsecentry#1#2#3#4{\dosubsubsecentry{#1}{#4}} + +% This parameter controls the indentation of the various levels. +% Same as \defaultparindent. +\newdimen\tocindent \tocindent = 15pt + +% Now for the actual typesetting. In all these, #1 is the text and #2 is the +% page number. +% +% If the toc has to be broken over pages, we want it to be at chapters +% if at all possible; hence the \penalty. +\def\dochapentry#1#2{% + \penalty-300 \vskip1\baselineskip plus.33\baselineskip minus.25\baselineskip + \begingroup + % Move the page numbers slightly to the right + \advance\entryrightmargin by -0.05em + \chapentryfonts + \tocentry{#1}{\dopageno\bgroup#2\egroup}% + \endgroup + \nobreak\vskip .25\baselineskip plus.1\baselineskip +} + +\def\dosecentry#1#2{\begingroup + \secentryfonts \leftskip=\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +\def\dosubsecentry#1#2{\begingroup + \subsecentryfonts \leftskip=2\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +\def\dosubsubsecentry#1#2{\begingroup + \subsubsecentryfonts \leftskip=3\tocindent + \tocentry{#1}{\dopageno\bgroup#2\egroup}% +\endgroup} + +% We use the same \entry macro as for the index entries. +\let\tocentry = \entry + +% Space between chapter (or whatever) number and the title. +\def\labelspace{\hskip1em \relax} + +\def\dopageno#1{{\rm #1}} +\def\doshortpageno#1{{\rm #1}} + +\def\chapentryfonts{\secfonts \rm} +\def\secentryfonts{\textfonts} +\def\subsecentryfonts{\textfonts} +\def\subsubsecentryfonts{\textfonts} + + +\message{environments,} +% @foo ... @end foo. + +% @tex ... @end tex escapes into raw TeX temporarily. +% One exception: @ is still an escape character, so that @end tex works. +% But \@ or @@ will get a plain @ character. + +\envdef\tex{% + \setupmarkupstyle{tex}% + \catcode `\\=0 \catcode `\{=1 \catcode `\}=2 + \catcode `\$=3 \catcode `\&=4 \catcode `\#=6 + \catcode `\^=7 \catcode `\_=8 \catcode `\~=\active \let~=\tie + \catcode `\%=14 + \catcode `\+=\other + \catcode `\"=\other + \catcode `\|=\other + \catcode `\<=\other + \catcode `\>=\other + \catcode `\`=\other + \catcode `\'=\other + % + % ' is active in math mode (mathcode"8000). So reset it, and all our + % other math active characters (just in case), to plain's definitions. + \mathactive + % + % Inverse of the list at the beginning of the file. + \let\b=\ptexb + \let\bullet=\ptexbullet + \let\c=\ptexc + \let\,=\ptexcomma + \let\.=\ptexdot + \let\dots=\ptexdots + \let\equiv=\ptexequiv + \let\!=\ptexexclam + \let\i=\ptexi + \let\indent=\ptexindent + \let\noindent=\ptexnoindent + \let\{=\ptexlbrace + \let\+=\tabalign + \let\}=\ptexrbrace + \let\/=\ptexslash + \let\sp=\ptexsp + \let\*=\ptexstar + %\let\sup=\ptexsup % do not redefine, we want @sup to work in math mode + \let\t=\ptext + \expandafter \let\csname top\endcsname=\ptextop % we've made it outer + \let\frenchspacing=\plainfrenchspacing + % + \def\endldots{\mathinner{\ldots\ldots\ldots\ldots}}% + \def\enddots{\relax\ifmmode\endldots\else$\mathsurround=0pt \endldots\,$\fi}% + \def\@{@}% +} +% There is no need to define \Etex. + +% Define @lisp ... @end lisp. +% @lisp environment forms a group so it can rebind things, +% including the definition of @end lisp (which normally is erroneous). + +% Amount to narrow the margins by for @lisp. +\newskip\lispnarrowing \lispnarrowing=0.4in + +% This is the definition that ^^M gets inside @lisp, @example, and other +% such environments. \null is better than a space, since it doesn't +% have any width. +\def\lisppar{\null\endgraf} + +% This space is always present above and below environments. +\newskip\envskipamount \envskipamount = 0pt + +% Make spacing and below environment symmetrical. We use \parskip here +% to help in doing that, since in @example-like environments \parskip +% is reset to zero; thus the \afterenvbreak inserts no space -- but the +% start of the next paragraph will insert \parskip. +% +\def\aboveenvbreak{{% + % =10000 instead of <10000 because of a special case in \itemzzz and + % \sectionheading, q.v. + \ifnum \lastpenalty=10000 \else + \advance\envskipamount by \parskip + \endgraf + \ifdim\lastskip<\envskipamount + \removelastskip + \ifnum\lastpenalty<10000 + % Penalize breaking before the environment, because preceding text + % often leads into it. + \penalty100 + \fi + \vskip\envskipamount + \fi + \fi +}} + +\def\afterenvbreak{{% + % =10000 instead of <10000 because of a special case in \itemzzz and + % \sectionheading, q.v. + \ifnum \lastpenalty=10000 \else + \advance\envskipamount by \parskip + \endgraf + \ifdim\lastskip<\envskipamount + \removelastskip + % it's not a good place to break if the last penalty was \nobreak + % or better ... + \ifnum\lastpenalty<10000 \penalty-50 \fi + \vskip\envskipamount + \fi + \fi +}} + +% \nonarrowing is a flag. If "set", @lisp etc don't narrow margins; it will +% also clear it, so that its embedded environments do the narrowing again. +\let\nonarrowing=\relax + +% @cartouche ... @end cartouche: draw rectangle w/rounded corners around +% environment contents. +\font\circle=lcircle10 +\newdimen\circthick +\newdimen\cartouter\newdimen\cartinner +\newskip\normbskip\newskip\normpskip\newskip\normlskip +\circthick=\fontdimen8\circle +% +\def\ctl{{\circle\char'013\hskip -6pt}}% 6pt from pl file: 1/2charwidth +\def\ctr{{\hskip 6pt\circle\char'010}} +\def\cbl{{\circle\char'012\hskip -6pt}} +\def\cbr{{\hskip 6pt\circle\char'011}} +\def\carttop{\hbox to \cartouter{\hskip\lskip + \ctl\leaders\hrule height\circthick\hfil\ctr + \hskip\rskip}} +\def\cartbot{\hbox to \cartouter{\hskip\lskip + \cbl\leaders\hrule height\circthick\hfil\cbr + \hskip\rskip}} +% +\newskip\lskip\newskip\rskip + +\envdef\cartouche{% + \ifhmode\par\fi % can't be in the midst of a paragraph. + \startsavinginserts + \lskip=\leftskip \rskip=\rightskip + \leftskip=0pt\rightskip=0pt % we want these *outside*. + \cartinner=\hsize \advance\cartinner by-\lskip + \advance\cartinner by-\rskip + \cartouter=\hsize + \advance\cartouter by 18.4pt % allow for 3pt kerns on either + % side, and for 6pt waste from + % each corner char, and rule thickness + \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip + % + % If this cartouche directly follows a sectioning command, we need the + % \parskip glue (backspaced over by default) or the cartouche can + % collide with the section heading. + \ifnum\lastpenalty>10000 \vskip\parskip \penalty\lastpenalty \fi + % + \setbox\groupbox=\vbox\bgroup + \baselineskip=0pt\parskip=0pt\lineskip=0pt + \carttop + \hbox\bgroup + \hskip\lskip + \vrule\kern3pt + \vbox\bgroup + \kern3pt + \hsize=\cartinner + \baselineskip=\normbskip + \lineskip=\normlskip + \parskip=\normpskip + \vskip -\parskip + \comment % For explanation, see the end of def\group. +} +\def\Ecartouche{% + \ifhmode\par\fi + \kern3pt + \egroup + \kern3pt\vrule + \hskip\rskip + \egroup + \cartbot + \egroup + \addgroupbox + \checkinserts +} + + +% This macro is called at the beginning of all the @example variants, +% inside a group. +\newdimen\nonfillparindent +\def\nonfillstart{% + \aboveenvbreak + \ifdim\hfuzz < 12pt \hfuzz = 12pt \fi % Don't be fussy + \sepspaces % Make spaces be word-separators rather than space tokens. + \let\par = \lisppar % don't ignore blank lines + \obeylines % each line of input is a line of output + \parskip = 0pt + % Turn off paragraph indentation but redefine \indent to emulate + % the normal \indent. + \nonfillparindent=\parindent + \parindent = 0pt + \let\indent\nonfillindent + % + \emergencystretch = 0pt % don't try to avoid overfull boxes + \ifx\nonarrowing\relax + \advance \leftskip by \lispnarrowing + \exdentamount=\lispnarrowing + \else + \let\nonarrowing = \relax + \fi + \let\exdent=\nofillexdent +} + +\begingroup +\obeyspaces +% We want to swallow spaces (but not other tokens) after the fake +% @indent in our nonfill-environments, where spaces are normally +% active and set to @tie, resulting in them not being ignored after +% @indent. +\gdef\nonfillindent{\futurelet\temp\nonfillindentcheck}% +\gdef\nonfillindentcheck{% +\ifx\temp % +\expandafter\nonfillindentgobble% +\else% +\leavevmode\nonfillindentbox% +\fi% +}% +\endgroup +\def\nonfillindentgobble#1{\nonfillindent} +\def\nonfillindentbox{\hbox to \nonfillparindent{\hss}} + +% If you want all examples etc. small: @set dispenvsize small. +% If you want even small examples the full size: @set dispenvsize nosmall. +% This affects the following displayed environments: +% @example, @display, @format, @lisp +% +\def\smallword{small} +\def\nosmallword{nosmall} +\let\SETdispenvsize\relax +\def\setnormaldispenv{% + \ifx\SETdispenvsize\smallword + % end paragraph for sake of leading, in case document has no blank + % line. This is redundant with what happens in \aboveenvbreak, but + % we need to do it before changing the fonts, and it's inconvenient + % to change the fonts afterward. + \ifnum \lastpenalty=10000 \else \endgraf \fi + \smallexamplefonts \rm + \fi +} +\def\setsmalldispenv{% + \ifx\SETdispenvsize\nosmallword + \else + \ifnum \lastpenalty=10000 \else \endgraf \fi + \smallexamplefonts \rm + \fi +} + +% We often define two environments, @foo and @smallfoo. +% Let's do it in one command. #1 is the env name, #2 the definition. +\def\makedispenvdef#1#2{% + \expandafter\envdef\csname#1\endcsname {\setnormaldispenv #2}% + \expandafter\envdef\csname small#1\endcsname {\setsmalldispenv #2}% + \expandafter\let\csname E#1\endcsname \afterenvbreak + \expandafter\let\csname Esmall#1\endcsname \afterenvbreak +} + +% Define two environment synonyms (#1 and #2) for an environment. +\def\maketwodispenvdef#1#2#3{% + \makedispenvdef{#1}{#3}% + \makedispenvdef{#2}{#3}% +} +% +% @lisp: indented, narrowed, typewriter font; +% @example: same as @lisp. +% +% @smallexample and @smalllisp: use smaller fonts. +% Originally contributed by Pavel@xerox. +% +\maketwodispenvdef{lisp}{example}{% + \nonfillstart + \tt\setupmarkupstyle{example}% + \let\kbdfont = \kbdexamplefont % Allow @kbd to do something special. + \gobble % eat return +} +% @display/@smalldisplay: same as @lisp except keep current font. +% +\makedispenvdef{display}{% + \nonfillstart + \gobble +} + +% @format/@smallformat: same as @display except don't narrow margins. +% +\makedispenvdef{format}{% + \let\nonarrowing = t% + \nonfillstart + \gobble +} + +% @flushleft: same as @format, but doesn't obey \SETdispenvsize. +\envdef\flushleft{% + \let\nonarrowing = t% + \nonfillstart + \gobble +} +\let\Eflushleft = \afterenvbreak + +% @flushright. +% +\envdef\flushright{% + \let\nonarrowing = t% + \nonfillstart + \advance\leftskip by 0pt plus 1fill\relax + \gobble +} +\let\Eflushright = \afterenvbreak + + +% @raggedright does more-or-less normal line breaking but no right +% justification. From plain.tex. Don't stretch around special +% characters in urls in this environment, since the stretch at the right +% should be enough. +\envdef\raggedright{% + \rightskip0pt plus2.4em \spaceskip.3333em \xspaceskip.5em\relax + \def\urefprestretchamount{0pt}% + \def\urefpoststretchamount{0pt}% +} +\let\Eraggedright\par + +\envdef\raggedleft{% + \parindent=0pt \leftskip0pt plus2em + \spaceskip.3333em \xspaceskip.5em \parfillskip=0pt + \hbadness=10000 % Last line will usually be underfull, so turn off + % badness reporting. +} +\let\Eraggedleft\par + +\envdef\raggedcenter{% + \parindent=0pt \rightskip0pt plus1em \leftskip0pt plus1em + \spaceskip.3333em \xspaceskip.5em \parfillskip=0pt + \hbadness=10000 % Last line will usually be underfull, so turn off + % badness reporting. +} +\let\Eraggedcenter\par + + +% @quotation does normal linebreaking (hence we can't use \nonfillstart) +% and narrows the margins. We keep \parskip nonzero in general, since +% we're doing normal filling. So, when using \aboveenvbreak and +% \afterenvbreak, temporarily make \parskip 0. +% +\makedispenvdef{quotation}{\quotationstart} +% +\def\quotationstart{% + \indentedblockstart % same as \indentedblock, but increase right margin too. + \ifx\nonarrowing\relax + \advance\rightskip by \lispnarrowing + \fi + \parsearg\quotationlabel +} + +% We have retained a nonzero parskip for the environment, since we're +% doing normal filling. +% +\def\Equotation{% + \par + \ifx\quotationauthor\thisisundefined\else + % indent a bit. + \leftline{\kern 2\leftskip \sl ---\quotationauthor}% + \fi + {\parskip=0pt \afterenvbreak}% +} +\def\Esmallquotation{\Equotation} + +% If we're given an argument, typeset it in bold with a colon after. +\def\quotationlabel#1{% + \def\temp{#1}% + \ifx\temp\empty \else + {\bf #1: }% + \fi +} + +% @indentedblock is like @quotation, but indents only on the left and +% has no optional argument. +% +\makedispenvdef{indentedblock}{\indentedblockstart} +% +\def\indentedblockstart{% + {\parskip=0pt \aboveenvbreak}% because \aboveenvbreak inserts \parskip + \parindent=0pt + % + % @cartouche defines \nonarrowing to inhibit narrowing at next level down. + \ifx\nonarrowing\relax + \advance\leftskip by \lispnarrowing + \exdentamount = \lispnarrowing + \else + \let\nonarrowing = \relax + \fi +} + +% Keep a nonzero parskip for the environment, since we're doing normal filling. +% +\def\Eindentedblock{% + \par + {\parskip=0pt \afterenvbreak}% +} +\def\Esmallindentedblock{\Eindentedblock} + + +% LaTeX-like @verbatim...@end verbatim and @verb{<char>...<char>} +% If we want to allow any <char> as delimiter, +% we need the curly braces so that makeinfo sees the @verb command, eg: +% `@verbx...x' would look like the '@verbx' command. --janneke@gnu.org +% +% [Knuth]: Donald Ervin Knuth, 1996. The TeXbook. +% +% [Knuth] p.344; only we need to do the other characters Texinfo sets +% active too. Otherwise, they get lost as the first character on a +% verbatim line. +\def\dospecials{% + \do\ \do\\\do\{\do\}\do\$\do\&% + \do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~% + \do\<\do\>\do\|\do\@\do+\do\"% + % Don't do the quotes -- if we do, @set txicodequoteundirected and + % @set txicodequotebacktick will not have effect on @verb and + % @verbatim, and ?` and !` ligatures won't get disabled. + %\do\`\do\'% +} +% +% [Knuth] p. 380 +\def\uncatcodespecials{% + \def\do##1{\catcode`##1=\other}\dospecials} +% +% Setup for the @verb command. +% +% Eight spaces for a tab +\begingroup + \catcode`\^^I=\active + \gdef\tabeightspaces{\catcode`\^^I=\active\def^^I{\ \ \ \ \ \ \ \ }} +\endgroup +% +\def\setupverb{% + \tt % easiest (and conventionally used) font for verbatim + \def\par{\leavevmode\endgraf}% + \setupmarkupstyle{verb}% + \tabeightspaces + % Respect line breaks, + % print special symbols as themselves, and + % make each space count + % must do in this order: + \obeylines \uncatcodespecials \sepspaces +} + +% Setup for the @verbatim environment +% +% Real tab expansion. +\newdimen\tabw \setbox0=\hbox{\tt\space} \tabw=8\wd0 % tab amount +% +% We typeset each line of the verbatim in an \hbox, so we can handle +% tabs. The \global is in case the verbatim line starts with an accent, +% or some other command that starts with a begin-group. Otherwise, the +% entire \verbbox would disappear at the corresponding end-group, before +% it is typeset. Meanwhile, we can't have nested verbatim commands +% (can we?), so the \global won't be overwriting itself. +\newbox\verbbox +\def\starttabbox{\global\setbox\verbbox=\hbox\bgroup} +% +\begingroup + \catcode`\^^I=\active + \gdef\tabexpand{% + \catcode`\^^I=\active + \def^^I{\leavevmode\egroup + \dimen\verbbox=\wd\verbbox % the width so far, or since the previous tab + \divide\dimen\verbbox by\tabw + \multiply\dimen\verbbox by\tabw % compute previous multiple of \tabw + \advance\dimen\verbbox by\tabw % advance to next multiple of \tabw + \wd\verbbox=\dimen\verbbox \box\verbbox \starttabbox + }% + } +\endgroup + +% start the verbatim environment. +\def\setupverbatim{% + \let\nonarrowing = t% + \nonfillstart + \tt % easiest (and conventionally used) font for verbatim + % The \leavevmode here is for blank lines. Otherwise, we would + % never \starttabox and the \egroup would end verbatim mode. + \def\par{\leavevmode\egroup\box\verbbox\endgraf}% + \tabexpand + \setupmarkupstyle{verbatim}% + % Respect line breaks, + % print special symbols as themselves, and + % make each space count. + % Must do in this order: + \obeylines \uncatcodespecials \sepspaces + \everypar{\starttabbox}% +} + +% Do the @verb magic: verbatim text is quoted by unique +% delimiter characters. Before first delimiter expect a +% right brace, after last delimiter expect closing brace: +% +% \def\doverb'{'<char>#1<char>'}'{#1} +% +% [Knuth] p. 382; only eat outer {} +\begingroup + \catcode`[=1\catcode`]=2\catcode`\{=\other\catcode`\}=\other + \gdef\doverb{#1[\def\next##1#1}[##1\endgroup]\next] +\endgroup +% +\def\verb{\begingroup\setupverb\doverb} +% +% +% Do the @verbatim magic: define the macro \doverbatim so that +% the (first) argument ends when '@end verbatim' is reached, ie: +% +% \def\doverbatim#1@end verbatim{#1} +% +% For Texinfo it's a lot easier than for LaTeX, +% because texinfo's \verbatim doesn't stop at '\end{verbatim}': +% we need not redefine '\', '{' and '}'. +% +% Inspired by LaTeX's verbatim command set [latex.ltx] +% +\begingroup + \catcode`\ =\active + \obeylines % + % ignore everything up to the first ^^M, that's the newline at the end + % of the @verbatim input line itself. Otherwise we get an extra blank + % line in the output. + \xdef\doverbatim#1^^M#2@end verbatim{#2\noexpand\end\gobble verbatim}% + % We really want {...\end verbatim} in the body of the macro, but + % without the active space; thus we have to use \xdef and \gobble. +\endgroup +% +\envdef\verbatim{% + \setupverbatim\doverbatim +} +\let\Everbatim = \afterenvbreak + + +% @verbatiminclude FILE - insert text of file in verbatim environment. +% +\def\verbatiminclude{\parseargusing\filenamecatcodes\doverbatiminclude} +% +\def\doverbatiminclude#1{% + {% + \makevalueexpandable + \setupverbatim + \indexnofonts % Allow `@@' and other weird things in file names. + \wlog{texinfo.tex: doing @verbatiminclude of #1^^J}% + \input #1 + \afterenvbreak + }% +} + +% @copying ... @end copying. +% Save the text away for @insertcopying later. +% +% We save the uninterpreted tokens, rather than creating a box. +% Saving the text in a box would be much easier, but then all the +% typesetting commands (@smallbook, font changes, etc.) have to be done +% beforehand -- and a) we want @copying to be done first in the source +% file; b) letting users define the frontmatter in as flexible order as +% possible is desirable. +% +\def\copying{\checkenv{}\begingroup\scanargctxt\docopying} +\def\docopying#1@end copying{\endgroup\def\copyingtext{#1}} +% +\def\insertcopying{% + \begingroup + \parindent = 0pt % paragraph indentation looks wrong on title page + \scanexp\copyingtext + \endgroup +} + + +\message{defuns,} +% @defun etc. + +\newskip\defbodyindent \defbodyindent=.4in +\newskip\defargsindent \defargsindent=50pt +\newskip\deflastargmargin \deflastargmargin=18pt +\newcount\defunpenalty + +% Start the processing of @deffn: +\def\startdefun{% + \ifnum\lastpenalty<10000 + \medbreak + \defunpenalty=10003 % Will keep this @deffn together with the + % following @def command, see below. + \else + % If there are two @def commands in a row, we'll have a \nobreak, + % which is there to keep the function description together with its + % header. But if there's nothing but headers, we need to allow a + % break somewhere. Check specifically for penalty 10002, inserted + % by \printdefunline, instead of 10000, since the sectioning + % commands also insert a nobreak penalty, and we don't want to allow + % a break between a section heading and a defun. + % + % As a further refinement, we avoid "club" headers by signalling + % with penalty of 10003 after the very first @deffn in the + % sequence (see above), and penalty of 10002 after any following + % @def command. + \ifnum\lastpenalty=10002 \penalty2000 \else \defunpenalty=10002 \fi + % + % Similarly, after a section heading, do not allow a break. + % But do insert the glue. + \medskip % preceded by discardable penalty, so not a breakpoint + \fi + % + \parindent=0in + \advance\leftskip by \defbodyindent + \exdentamount=\defbodyindent +} + +\def\dodefunx#1{% + % First, check whether we are in the right environment: + \checkenv#1% + % + % As above, allow line break if we have multiple x headers in a row. + % It's not a great place, though. + \ifnum\lastpenalty=10002 \penalty3000 \else \defunpenalty=10002 \fi + % + % And now, it's time to reuse the body of the original defun: + \expandafter\gobbledefun#1% +} +\def\gobbledefun#1\startdefun{} + +% \printdefunline \deffnheader{text} +% +\def\printdefunline#1#2{% + \begingroup + % call \deffnheader: + #1#2 \endheader + % common ending: + \interlinepenalty = 10000 + \advance\rightskip by 0pt plus 1fil\relax + \endgraf + \nobreak\vskip -\parskip + \penalty\defunpenalty % signal to \startdefun and \dodefunx + % Some of the @defun-type tags do not enable magic parentheses, + % rendering the following check redundant. But we don't optimize. + \checkparencounts + \endgroup +} + +\def\Edefun{\endgraf\medbreak} + +% \makedefun{deffn} creates \deffn, \deffnx and \Edeffn; +% the only thing remaining is to define \deffnheader. +% +\def\makedefun#1{% + \expandafter\let\csname E#1\endcsname = \Edefun + \edef\temp{\noexpand\domakedefun + \makecsname{#1}\makecsname{#1x}\makecsname{#1header}}% + \temp +} + +% \domakedefun \deffn \deffnx \deffnheader { (defn. of \deffnheader) } +% +% Define \deffn and \deffnx, without parameters. +% \deffnheader has to be defined explicitly. +% +\def\domakedefun#1#2#3{% + \envdef#1{% + \startdefun + \doingtypefnfalse % distinguish typed functions from all else + \parseargusing\activeparens{\printdefunline#3}% + }% + \def#2{\dodefunx#1}% + \def#3% +} + +\newif\ifdoingtypefn % doing typed function? +\newif\ifrettypeownline % typeset return type on its own line? + +% @deftypefnnewline on|off says whether the return type of typed functions +% are printed on their own line. This affects @deftypefn, @deftypefun, +% @deftypeop, and @deftypemethod. +% +\parseargdef\deftypefnnewline{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETtxideftypefnnl\endcsname + = \empty + \else\ifx\temp\offword + \expandafter\let\csname SETtxideftypefnnl\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @txideftypefnnl value `\temp', + must be on|off}% + \fi\fi +} + +% Untyped functions: + +% @deffn category name args +\makedefun{deffn}{\deffngeneral{}} + +% @deffn category class name args +\makedefun{defop}#1 {\defopon{#1\ \putwordon}} + +% \defopon {category on}class name args +\def\defopon#1#2 {\deffngeneral{\putwordon\ \code{#2}}{#1\ \code{#2}} } + +% \deffngeneral {subind}category name args +% +\def\deffngeneral#1#2 #3 #4\endheader{% + % Remember that \dosubind{fn}{foo}{} is equivalent to \doind{fn}{foo}. + \dosubind{fn}{\code{#3}}{#1}% + \defname{#2}{}{#3}\magicamp\defunargs{#4\unskip}% +} + +% Typed functions: + +% @deftypefn category type name args +\makedefun{deftypefn}{\deftypefngeneral{}} + +% @deftypeop category class type name args +\makedefun{deftypeop}#1 {\deftypeopon{#1\ \putwordon}} + +% \deftypeopon {category on}class type name args +\def\deftypeopon#1#2 {\deftypefngeneral{\putwordon\ \code{#2}}{#1\ \code{#2}} } + +% \deftypefngeneral {subind}category type name args +% +\def\deftypefngeneral#1#2 #3 #4 #5\endheader{% + \dosubind{fn}{\code{#4}}{#1}% + \doingtypefntrue + \defname{#2}{#3}{#4}\defunargs{#5\unskip}% +} + +% Typed variables: + +% @deftypevr category type var args +\makedefun{deftypevr}{\deftypecvgeneral{}} + +% @deftypecv category class type var args +\makedefun{deftypecv}#1 {\deftypecvof{#1\ \putwordof}} + +% \deftypecvof {category of}class type var args +\def\deftypecvof#1#2 {\deftypecvgeneral{\putwordof\ \code{#2}}{#1\ \code{#2}} } + +% \deftypecvgeneral {subind}category type var args +% +\def\deftypecvgeneral#1#2 #3 #4 #5\endheader{% + \dosubind{vr}{\code{#4}}{#1}% + \defname{#2}{#3}{#4}\defunargs{#5\unskip}% +} + +% Untyped variables: + +% @defvr category var args +\makedefun{defvr}#1 {\deftypevrheader{#1} {} } + +% @defcv category class var args +\makedefun{defcv}#1 {\defcvof{#1\ \putwordof}} + +% \defcvof {category of}class var args +\def\defcvof#1#2 {\deftypecvof{#1}#2 {} } + +% Types: + +% @deftp category name args +\makedefun{deftp}#1 #2 #3\endheader{% + \doind{tp}{\code{#2}}% + \defname{#1}{}{#2}\defunargs{#3\unskip}% +} + +% Remaining @defun-like shortcuts: +\makedefun{defun}{\deffnheader{\putwordDeffunc} } +\makedefun{defmac}{\deffnheader{\putwordDefmac} } +\makedefun{defspec}{\deffnheader{\putwordDefspec} } +\makedefun{deftypefun}{\deftypefnheader{\putwordDeffunc} } +\makedefun{defvar}{\defvrheader{\putwordDefvar} } +\makedefun{defopt}{\defvrheader{\putwordDefopt} } +\makedefun{deftypevar}{\deftypevrheader{\putwordDefvar} } +\makedefun{defmethod}{\defopon\putwordMethodon} +\makedefun{deftypemethod}{\deftypeopon\putwordMethodon} +\makedefun{defivar}{\defcvof\putwordInstanceVariableof} +\makedefun{deftypeivar}{\deftypecvof\putwordInstanceVariableof} + +% \defname, which formats the name of the @def (not the args). +% #1 is the category, such as "Function". +% #2 is the return type, if any. +% #3 is the function name. +% +% We are followed by (but not passed) the arguments, if any. +% +\def\defname#1#2#3{% + \par + % Get the values of \leftskip and \rightskip as they were outside the @def... + \advance\leftskip by -\defbodyindent + % + % Determine if we are typesetting the return type of a typed function + % on a line by itself. + \rettypeownlinefalse + \ifdoingtypefn % doing a typed function specifically? + % then check user option for putting return type on its own line: + \expandafter\ifx\csname SETtxideftypefnnl\endcsname\relax \else + \rettypeownlinetrue + \fi + \fi + % + % How we'll format the category name. Putting it in brackets helps + % distinguish it from the body text that may end up on the next line + % just below it. + \def\temp{#1}% + \setbox0=\hbox{\kern\deflastargmargin \ifx\temp\empty\else [\rm\temp]\fi} + % + % Figure out line sizes for the paragraph shape. We'll always have at + % least two. + \tempnum = 2 + % + % The first line needs space for \box0; but if \rightskip is nonzero, + % we need only space for the part of \box0 which exceeds it: + \dimen0=\hsize \advance\dimen0 by -\wd0 \advance\dimen0 by \rightskip + % + % If doing a return type on its own line, we'll have another line. + \ifrettypeownline + \advance\tempnum by 1 + \def\maybeshapeline{0in \hsize}% + \else + \def\maybeshapeline{}% + \fi + % + % The continuations: + \dimen2=\hsize \advance\dimen2 by -\defargsindent + % + % The final paragraph shape: + \parshape \tempnum 0in \dimen0 \maybeshapeline \defargsindent \dimen2 + % + % Put the category name at the right margin. + \noindent + \hbox to 0pt{% + \hfil\box0 \kern-\hsize + % \hsize has to be shortened this way: + \kern\leftskip + % Intentionally do not respect \rightskip, since we need the space. + }% + % + % Allow all lines to be underfull without complaint: + \tolerance=10000 \hbadness=10000 + \exdentamount=\defbodyindent + {% + % defun fonts. We use typewriter by default (used to be bold) because: + % . we're printing identifiers, they should be in tt in principle. + % . in languages with many accents, such as Czech or French, it's + % common to leave accents off identifiers. The result looks ok in + % tt, but exceedingly strange in rm. + % . we don't want -- and --- to be treated as ligatures. + % . this still does not fix the ?` and !` ligatures, but so far no + % one has made identifiers using them :). + \df \tt + \def\temp{#2}% text of the return type + \ifx\temp\empty\else + \tclose{\temp}% typeset the return type + \ifrettypeownline + % put return type on its own line; prohibit line break following: + \hfil\vadjust{\nobreak}\break + \else + \space % type on same line, so just followed by a space + \fi + \fi % no return type + #3% output function name + }% + {\rm\enskip}% hskip 0.5 em of \rmfont + % + \boldbrax + % arguments will be output next, if any. +} + +% Print arguments in slanted roman (not ttsl), inconsistently with using +% tt for the name. This is because literal text is sometimes needed in +% the argument list (groff manual), and ttsl and tt are not very +% distinguishable. Prevent hyphenation at `-' chars. +% +\def\defunargs#1{% + % use sl by default (not ttsl), + % tt for the names. + \df \sl \hyphenchar\font=0 + % + % On the other hand, if an argument has two dashes (for instance), we + % want a way to get ttsl. We used to recommend @var for that, so + % leave the code in, but it's strange for @var to lead to typewriter. + % Nowadays we recommend @code, since the difference between a ttsl hyphen + % and a tt hyphen is pretty tiny. @code also disables ?` !`. + \def\var##1{{\setupmarkupstyle{var}\ttslanted{##1}}}% + #1% + \sl\hyphenchar\font=45 +} + +% We want ()&[] to print specially on the defun line. +% +\def\activeparens{% + \catcode`\(=\active \catcode`\)=\active + \catcode`\[=\active \catcode`\]=\active + \catcode`\&=\active +} + +% Make control sequences which act like normal parenthesis chars. +\let\lparen = ( \let\rparen = ) + +% Be sure that we always have a definition for `(', etc. For example, +% if the fn name has parens in it, \boldbrax will not be in effect yet, +% so TeX would otherwise complain about undefined control sequence. +{ + \activeparens + \global\let(=\lparen \global\let)=\rparen + \global\let[=\lbrack \global\let]=\rbrack + \global\let& = \& + + \gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb} + \gdef\magicamp{\let&=\amprm} +} + +\newcount\parencount + +% If we encounter &foo, then turn on ()-hacking afterwards +\newif\ifampseen +\def\amprm#1 {\ampseentrue{\bf\ }} + +\def\parenfont{% + \ifampseen + % At the first level, print parens in roman, + % otherwise use the default font. + \ifnum \parencount=1 \rm \fi + \else + % The \sf parens (in \boldbrax) actually are a little bolder than + % the contained text. This is especially needed for [ and ] . + \sf + \fi +} +\def\infirstlevel#1{% + \ifampseen + \ifnum\parencount=1 + #1% + \fi + \fi +} +\def\bfafterword#1 {#1 \bf} + +\def\opnr{% + \global\advance\parencount by 1 + {\parenfont(}% + \infirstlevel \bfafterword +} +\def\clnr{% + {\parenfont)}% + \infirstlevel \sl + \global\advance\parencount by -1 +} + +\newcount\brackcount +\def\lbrb{% + \global\advance\brackcount by 1 + {\bf[}% +} +\def\rbrb{% + {\bf]}% + \global\advance\brackcount by -1 +} + +\def\checkparencounts{% + \ifnum\parencount=0 \else \badparencount \fi + \ifnum\brackcount=0 \else \badbrackcount \fi +} +% these should not use \errmessage; the glibc manual, at least, actually +% has such constructs (when documenting function pointers). +\def\badparencount{% + \message{Warning: unbalanced parentheses in @def...}% + \global\parencount=0 +} +\def\badbrackcount{% + \message{Warning: unbalanced square brackets in @def...}% + \global\brackcount=0 +} + + +\message{macros,} +% @macro. + +% To do this right we need a feature of e-TeX, \scantokens, +% which we arrange to emulate with a temporary file in ordinary TeX. +\ifx\eTeXversion\thisisundefined + \newwrite\macscribble + \def\scantokens#1{% + \toks0={#1}% + \immediate\openout\macscribble=\jobname.tmp + \immediate\write\macscribble{\the\toks0}% + \immediate\closeout\macscribble + \input \jobname.tmp + } +\fi + +\let\aftermacroxxx\relax +\def\aftermacro{\aftermacroxxx} + +% alias because \c means cedilla in @tex or @math +\let\texinfoc=\c + +\newcount\savedcatcodeone +\newcount\savedcatcodetwo + +% Used at the time of macro expansion. +% Argument is macro body with arguments substituted +\def\scanmacro#1{% + \newlinechar`\^^M + \def\xeatspaces{\eatspaces}% + % + % Temporarily undo catcode changes of \printindex. Set catcode of @ to + % 0 so that @-commands in macro expansions aren't printed literally when + % formatting an index file, where \ is used as the escape character. + \savedcatcodeone=\catcode`\@ + \savedcatcodetwo=\catcode`\\ + \catcode`\@=0 + \catcode`\\=\active + % + % Process the macro body under the current catcode regime. + \scantokens{#1@texinfoc}\aftermacro% + % + \catcode`\@=\savedcatcodeone + \catcode`\\=\savedcatcodetwo + % + % The \texinfoc is to remove the \newlinechar added by \scantokens, and + % can be noticed by \parsearg. + % The \aftermacro allows a \comment at the end of the macro definition + % to duplicate itself past the final \newlinechar added by \scantokens: + % this is used in the definition of \group to comment out a newline. We + % don't do the same for \c to support Texinfo files with macros that ended + % with a @c, which should no longer be necessary. + % We avoid surrounding the call to \scantokens with \bgroup and \egroup + % to allow macros to open or close groups themselves. +} + +% Used for copying and captions +\def\scanexp#1{% + \expandafter\scanmacro\expandafter{#1}% +} + +\newcount\paramno % Count of parameters +\newtoks\macname % Macro name +\newif\ifrecursive % Is it recursive? + +% List of all defined macros in the form +% \commondummyword\macro1\commondummyword\macro2... +% Currently is also contains all @aliases; the list can be split +% if there is a need. +\def\macrolist{} + +% Add the macro to \macrolist +\def\addtomacrolist#1{\expandafter \addtomacrolistxxx \csname#1\endcsname} +\def\addtomacrolistxxx#1{% + \toks0 = \expandafter{\macrolist\commondummyword#1}% + \xdef\macrolist{\the\toks0}% +} + +% Utility routines. +% This does \let #1 = #2, with \csnames; that is, +% \let \csname#1\endcsname = \csname#2\endcsname +% (except of course we have to play expansion games). +% +\def\cslet#1#2{% + \expandafter\let + \csname#1\expandafter\endcsname + \csname#2\endcsname +} + +% Trim leading and trailing spaces off a string. +% Concepts from aro-bend problem 15 (see CTAN). +{\catcode`\@=11 +\gdef\eatspaces #1{\expandafter\trim@\expandafter{#1 }} +\gdef\trim@ #1{\trim@@ @#1 @ #1 @ @@} +\gdef\trim@@ #1@ #2@ #3@@{\trim@@@\empty #2 @} +\def\unbrace#1{#1} +\unbrace{\gdef\trim@@@ #1 } #2@{#1} +} + +% Trim a single trailing ^^M off a string. +{\catcode`\^^M=\other \catcode`\Q=3% +\gdef\eatcr #1{\eatcra #1Q^^MQ}% +\gdef\eatcra#1^^MQ{\eatcrb#1Q}% +\gdef\eatcrb#1Q#2Q{#1}% +} + +% Macro bodies are absorbed as an argument in a context where +% all characters are catcode 10, 11 or 12, except \ which is active +% (as in normal texinfo). It is necessary to change the definition of \ +% to recognize macro arguments; this is the job of \mbodybackslash. +% +% Non-ASCII encodings make 8-bit characters active, so un-activate +% them to avoid their expansion. Must do this non-globally, to +% confine the change to the current group. +% +% It's necessary to have hard CRs when the macro is executed. This is +% done by making ^^M (\endlinechar) catcode 12 when reading the macro +% body, and then making it the \newlinechar in \scanmacro. +% +\def\scanctxt{% used as subroutine + \catcode`\"=\other + \catcode`\+=\other + \catcode`\<=\other + \catcode`\>=\other + \catcode`\^=\other + \catcode`\_=\other + \catcode`\|=\other + \catcode`\~=\other + \passthroughcharstrue +} + +\def\scanargctxt{% used for copying and captions, not macros. + \scanctxt + \catcode`\@=\other + \catcode`\\=\other + \catcode`\^^M=\other +} + +\def\macrobodyctxt{% used for @macro definitions + \scanctxt + \catcode`\ =\other + \catcode`\@=\other + \catcode`\{=\other + \catcode`\}=\other + \catcode`\^^M=\other + \usembodybackslash +} + +% Used when scanning braced macro arguments. Note, however, that catcode +% changes here are ineffectual if the macro invocation was nested inside +% an argument to another Texinfo command. +\def\macroargctxt{% + \scanctxt + \catcode`\ =\active + \catcode`\^^M=\other + \catcode`\\=\active +} + +\def\macrolineargctxt{% used for whole-line arguments without braces + \scanctxt + \catcode`\{=\other + \catcode`\}=\other +} + +% \mbodybackslash is the definition of \ in @macro bodies. +% It maps \foo\ => \csname macarg.foo\endcsname => #N +% where N is the macro parameter number. +% We define \csname macarg.\endcsname to be \realbackslash, so +% \\ in macro replacement text gets you a backslash. +% +{\catcode`@=0 @catcode`@\=@active + @gdef@usembodybackslash{@let\=@mbodybackslash} + @gdef@mbodybackslash#1\{@csname macarg.#1@endcsname} +} +\expandafter\def\csname macarg.\endcsname{\realbackslash} + +\def\margbackslash#1{\char`\#1 } + +\def\macro{\recursivefalse\parsearg\macroxxx} +\def\rmacro{\recursivetrue\parsearg\macroxxx} + +\def\macroxxx#1{% + \getargs{#1}% now \macname is the macname and \argl the arglist + \ifx\argl\empty % no arguments + \paramno=0\relax + \else + \expandafter\parsemargdef \argl;% + \if\paramno>256\relax + \ifx\eTeXversion\thisisundefined + \errhelp = \EMsimple + \errmessage{You need eTeX to compile a file with macros with more than 256 arguments} + \fi + \fi + \fi + \if1\csname ismacro.\the\macname\endcsname + \message{Warning: redefining \the\macname}% + \else + \expandafter\ifx\csname \the\macname\endcsname \relax + \else \errmessage{Macro name \the\macname\space already defined}\fi + \global\cslet{macsave.\the\macname}{\the\macname}% + \global\expandafter\let\csname ismacro.\the\macname\endcsname=1% + \addtomacrolist{\the\macname}% + \fi + \begingroup \macrobodyctxt + \ifrecursive \expandafter\parsermacbody + \else \expandafter\parsemacbody + \fi} + +\parseargdef\unmacro{% + \if1\csname ismacro.#1\endcsname + \global\cslet{#1}{macsave.#1}% + \global\expandafter\let \csname ismacro.#1\endcsname=0% + % Remove the macro name from \macrolist: + \begingroup + \expandafter\let\csname#1\endcsname \relax + \let\commondummyword\unmacrodo + \xdef\macrolist{\macrolist}% + \endgroup + \else + \errmessage{Macro #1 not defined}% + \fi +} + +% Called by \do from \dounmacro on each macro. The idea is to omit any +% macro definitions that have been changed to \relax. +% +\def\unmacrodo#1{% + \ifx #1\relax + % remove this + \else + \noexpand\commondummyword \noexpand#1% + \fi +} + +% \getargs -- Parse the arguments to a @macro line. Set \macname to +% the name of the macro, and \argl to the braced argument list. +\def\getargs#1{\getargsxxx#1{}} +\def\getargsxxx#1#{\getmacname #1 \relax\getmacargs} +\def\getmacname#1 #2\relax{\macname={#1}} +\def\getmacargs#1{\def\argl{#1}} +% This made use of the feature that if the last token of a +% <parameter list> is #, then the preceding argument is delimited by +% an opening brace, and that opening brace is not consumed. + +% Parse the optional {params} list to @macro or @rmacro. +% Set \paramno to the number of arguments, +% and \paramlist to a parameter text for the macro (e.g. #1,#2,#3 for a +% three-param macro.) Define \macarg.BLAH for each BLAH in the params +% list to some hook where the argument is to be expanded. If there are +% less than 10 arguments that hook is to be replaced by ##N where N +% is the position in that list, that is to say the macro arguments are to be +% defined `a la TeX in the macro body. +% +% That gets used by \mbodybackslash (above). +% +% If there are 10 or more arguments, a different technique is used: see +% \parsemmanyargdef. +% +\def\parsemargdef#1;{% + \paramno=0\def\paramlist{}% + \let\hash\relax + % \hash is redefined to `#' later to get it into definitions + \let\xeatspaces\relax + \parsemargdefxxx#1,;,% + \ifnum\paramno<10\relax\else + \paramno0\relax + \parsemmanyargdef@@#1,;,% 10 or more arguments + \fi +} +\def\parsemargdefxxx#1,{% + \if#1;\let\next=\relax + \else \let\next=\parsemargdefxxx + \advance\paramno by 1 + \expandafter\edef\csname macarg.\eatspaces{#1}\endcsname + {\xeatspaces{\hash\the\paramno}}% + \edef\paramlist{\paramlist\hash\the\paramno,}% + \fi\next} + +% \parsemacbody, \parsermacbody +% +% Read recursive and nonrecursive macro bodies. (They're different since +% rec and nonrec macros end differently.) +% +% We are in \macrobodyctxt, and the \xdef causes backslashshes in the macro +% body to be transformed. +% Set \macrobody to the body of the macro, and call \defmacro. +% +{\catcode`\ =\other\long\gdef\parsemacbody#1@end macro{% +\xdef\macrobody{\eatcr{#1}}\endgroup\defmacro}}% +{\catcode`\ =\other\long\gdef\parsermacbody#1@end rmacro{% +\xdef\macrobody{\eatcr{#1}}\endgroup\defmacro}}% + +% Make @ a letter, so that we can make private-to-Texinfo macro names. +\edef\texiatcatcode{\the\catcode`\@} +\catcode `@=11\relax + +%%%%%%%%%%%%%% Code for > 10 arguments only %%%%%%%%%%%%%%%%%% + +% If there are 10 or more arguments, a different technique is used, where the +% hook remains in the body, and when macro is to be expanded the body is +% processed again to replace the arguments. +% +% In that case, the hook is \the\toks N-1, and we simply set \toks N-1 to the +% argument N value and then \edef the body (nothing else will expand because of +% the catcode regime under which the body was input). +% +% If you compile with TeX (not eTeX), and you have macros with 10 or more +% arguments, no macro can have more than 256 arguments (else error). +% +% In case that there are 10 or more arguments we parse again the arguments +% list to set new definitions for the \macarg.BLAH macros corresponding to +% each BLAH argument. It was anyhow needed to parse already once this list +% in order to count the arguments, and as macros with at most 9 arguments +% are by far more frequent than macro with 10 or more arguments, defining +% twice the \macarg.BLAH macros does not cost too much processing power. +\def\parsemmanyargdef@@#1,{% + \if#1;\let\next=\relax + \else + \let\next=\parsemmanyargdef@@ + \edef\tempb{\eatspaces{#1}}% + \expandafter\def\expandafter\tempa + \expandafter{\csname macarg.\tempb\endcsname}% + % Note that we need some extra \noexpand\noexpand, this is because we + % don't want \the to be expanded in the \parsermacbody as it uses an + % \xdef . + \expandafter\edef\tempa + {\noexpand\noexpand\noexpand\the\toks\the\paramno}% + \advance\paramno by 1\relax + \fi\next} + + +\let\endargs@\relax +\let\nil@\relax +\def\nilm@{\nil@}% +\long\def\nillm@{\nil@}% + +% This macro is expanded during the Texinfo macro expansion, not during its +% definition. It gets all the arguments' values and assigns them to macros +% macarg.ARGNAME +% +% #1 is the macro name +% #2 is the list of argument names +% #3 is the list of argument values +\def\getargvals@#1#2#3{% + \def\macargdeflist@{}% + \def\saveparamlist@{#2}% Need to keep a copy for parameter expansion. + \def\paramlist{#2,\nil@}% + \def\macroname{#1}% + \begingroup + \macroargctxt + \def\argvaluelist{#3,\nil@}% + \def\@tempa{#3}% + \ifx\@tempa\empty + \setemptyargvalues@ + \else + \getargvals@@ + \fi +} +\def\getargvals@@{% + \ifx\paramlist\nilm@ + % Some sanity check needed here that \argvaluelist is also empty. + \ifx\argvaluelist\nillm@ + \else + \errhelp = \EMsimple + \errmessage{Too many arguments in macro `\macroname'!}% + \fi + \let\next\macargexpandinbody@ + \else + \ifx\argvaluelist\nillm@ + % No more arguments values passed to macro. Set remaining named-arg + % macros to empty. + \let\next\setemptyargvalues@ + \else + % pop current arg name into \@tempb + \def\@tempa##1{\pop@{\@tempb}{\paramlist}##1\endargs@}% + \expandafter\@tempa\expandafter{\paramlist}% + % pop current argument value into \@tempc + \def\@tempa##1{\longpop@{\@tempc}{\argvaluelist}##1\endargs@}% + \expandafter\@tempa\expandafter{\argvaluelist}% + % Here \@tempb is the current arg name and \@tempc is the current arg value. + % First place the new argument macro definition into \@tempd + \expandafter\macname\expandafter{\@tempc}% + \expandafter\let\csname macarg.\@tempb\endcsname\relax + \expandafter\def\expandafter\@tempe\expandafter{% + \csname macarg.\@tempb\endcsname}% + \edef\@tempd{\long\def\@tempe{\the\macname}}% + \push@\@tempd\macargdeflist@ + \let\next\getargvals@@ + \fi + \fi + \next +} + +\def\push@#1#2{% + \expandafter\expandafter\expandafter\def + \expandafter\expandafter\expandafter#2% + \expandafter\expandafter\expandafter{% + \expandafter#1#2}% +} + +% Replace arguments by their values in the macro body, and place the result +% in macro \@tempa. +% +\def\macvalstoargs@{% + % To do this we use the property that token registers that are \the'ed + % within an \edef expand only once. So we are going to place all argument + % values into respective token registers. + % + % First we save the token context, and initialize argument numbering. + \begingroup + \paramno0\relax + % Then, for each argument number #N, we place the corresponding argument + % value into a new token list register \toks#N + \expandafter\putargsintokens@\saveparamlist@,;,% + % Then, we expand the body so that argument are replaced by their + % values. The trick for values not to be expanded themselves is that they + % are within tokens and that tokens expand only once in an \edef . + \edef\@tempc{\csname mac.\macroname .body\endcsname}% + % Now we restore the token stack pointer to free the token list registers + % which we have used, but we make sure that expanded body is saved after + % group. + \expandafter + \endgroup + \expandafter\def\expandafter\@tempa\expandafter{\@tempc}% + } + +% Define the named-macro outside of this group and then close this group. +% +\def\macargexpandinbody@{% + \expandafter + \endgroup + \macargdeflist@ + % First the replace in body the macro arguments by their values, the result + % is in \@tempa . + \macvalstoargs@ + % Then we point at the \norecurse or \gobble (for recursive) macro value + % with \@tempb . + \expandafter\let\expandafter\@tempb\csname mac.\macroname .recurse\endcsname + % Depending on whether it is recursive or not, we need some tailing + % \egroup . + \ifx\@tempb\gobble + \let\@tempc\relax + \else + \let\@tempc\egroup + \fi + % And now we do the real job: + \edef\@tempd{\noexpand\@tempb{\macroname}\noexpand\scanmacro{\@tempa}\@tempc}% + \@tempd +} + +\def\putargsintokens@#1,{% + \if#1;\let\next\relax + \else + \let\next\putargsintokens@ + % First we allocate the new token list register, and give it a temporary + % alias \@tempb . + \toksdef\@tempb\the\paramno + % Then we place the argument value into that token list register. + \expandafter\let\expandafter\@tempa\csname macarg.#1\endcsname + \expandafter\@tempb\expandafter{\@tempa}% + \advance\paramno by 1\relax + \fi + \next +} + +% Trailing missing arguments are set to empty. +% +\def\setemptyargvalues@{% + \ifx\paramlist\nilm@ + \let\next\macargexpandinbody@ + \else + \expandafter\setemptyargvaluesparser@\paramlist\endargs@ + \let\next\setemptyargvalues@ + \fi + \next +} + +\def\setemptyargvaluesparser@#1,#2\endargs@{% + \expandafter\def\expandafter\@tempa\expandafter{% + \expandafter\def\csname macarg.#1\endcsname{}}% + \push@\@tempa\macargdeflist@ + \def\paramlist{#2}% +} + +% #1 is the element target macro +% #2 is the list macro +% #3,#4\endargs@ is the list value +\def\pop@#1#2#3,#4\endargs@{% + \def#1{#3}% + \def#2{#4}% +} +\long\def\longpop@#1#2#3,#4\endargs@{% + \long\def#1{#3}% + \long\def#2{#4}% +} + + +%%%%%%%%%%%%%% End of code for > 10 arguments %%%%%%%%%%%%%%%%%% + + +% This defines a Texinfo @macro or @rmacro, called by \parsemacbody. +% \macrobody has the body of the macro in it, with placeholders for +% its parameters, looking like "\xeatspaces{\hash 1}". +% \paramno is the number of parameters +% \paramlist is a TeX parameter text, e.g. "#1,#2,#3," +% There are four cases: macros of zero, one, up to nine, and many arguments. +% \xdef is used so that macro definitions will survive the file +% they're defined in: @include reads the file inside a group. +% +\def\defmacro{% + \let\hash=##% convert placeholders to macro parameter chars + \ifnum\paramno=1 + \def\xeatspaces##1{##1}% + % This removes the pair of braces around the argument. We don't + % use \eatspaces, because this can cause ends of lines to be lost + % when the argument to \eatspaces is read, leading to line-based + % commands like "@itemize" not being read correctly. + \else + \let\xeatspaces\relax % suppress expansion + \fi + \ifcase\paramno + % 0 + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\scanmacro{\macrobody}}% + \or % 1 + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup + \noexpand\braceorline + \expandafter\noexpand\csname\the\macname @@@\endcsname}% + \expandafter\xdef\csname\the\macname @@@\endcsname##1{% + \egroup + \noexpand\scanmacro{\macrobody}% + }% + \else % at most 9 + \ifnum\paramno<10\relax + % @MACNAME sets the context for reading the macro argument + % @MACNAME@@ gets the argument, processes backslashes and appends a + % comma. + % @MACNAME@@@ removes braces surrounding the argument list. + % @MACNAME@@@@ scans the macro body with arguments substituted. + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup + \noexpand\expandafter % This \expandafter skip any spaces after the + \noexpand\macroargctxt % macro before we change the catcode of space. + \noexpand\expandafter + \expandafter\noexpand\csname\the\macname @@\endcsname}% + \expandafter\xdef\csname\the\macname @@\endcsname##1{% + \noexpand\passargtomacro + \expandafter\noexpand\csname\the\macname @@@\endcsname{##1,}}% + \expandafter\xdef\csname\the\macname @@@\endcsname##1{% + \expandafter\noexpand\csname\the\macname @@@@\endcsname ##1}% + \expandafter\expandafter + \expandafter\xdef + \expandafter\expandafter + \csname\the\macname @@@@\endcsname\paramlist{% + \egroup\noexpand\scanmacro{\macrobody}}% + \else % 10 or more: + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\getargvals@{\the\macname}{\argl}% + }% + \global\expandafter\let\csname mac.\the\macname .body\endcsname\macrobody + \global\expandafter\let\csname mac.\the\macname .recurse\endcsname\gobble + \fi + \fi} + +\catcode `\@\texiatcatcode\relax % end private-to-Texinfo catcodes + +\def\norecurse#1{\bgroup\cslet{#1}{macsave.#1}} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +{\catcode`\@=0 \catcode`\\=13 % We need to manipulate \ so use @ as escape +@catcode`@_=11 % private names +@catcode`@!=11 % used as argument separator + +% \passargtomacro#1#2 - +% Call #1 with a list of tokens #2, with any doubled backslashes in #2 +% compressed to one. +% +% This implementation works by expansion, and not execution (so we cannot use +% \def or similar). This reduces the risk of this failing in contexts where +% complete expansion is done with no execution (for example, in writing out to +% an auxiliary file for an index entry). +% +% State is kept in the input stream: the argument passed to +% @look_ahead, @gobble_and_check_finish and @add_segment is +% +% THE_MACRO ARG_RESULT ! {PENDING_BS} NEXT_TOKEN (... rest of input) +% +% where: +% THE_MACRO - name of the macro we want to call +% ARG_RESULT - argument list we build to pass to that macro +% PENDING_BS - either a backslash or nothing +% NEXT_TOKEN - used to look ahead in the input stream to see what's coming next + +@gdef@passargtomacro#1#2{% + @add_segment #1!{}@relax#2\@_finish\% +} +@gdef@_finish{@_finishx} @global@let@_finishx@relax + +% #1 - THE_MACRO ARG_RESULT +% #2 - PENDING_BS +% #3 - NEXT_TOKEN +% #4 used to look ahead +% +% If the next token is not a backslash, process the rest of the argument; +% otherwise, remove the next token. +@gdef@look_ahead#1!#2#3#4{% + @ifx#4\% + @expandafter@gobble_and_check_finish + @else + @expandafter@add_segment + @fi#1!{#2}#4#4% +} + +% #1 - THE_MACRO ARG_RESULT +% #2 - PENDING_BS +% #3 - NEXT_TOKEN +% #4 should be a backslash, which is gobbled. +% #5 looks ahead +% +% Double backslash found. Add a single backslash, and look ahead. +@gdef@gobble_and_check_finish#1!#2#3#4#5{% + @add_segment#1\!{}#5#5% +} + +@gdef@is_fi{@fi} + +% #1 - THE_MACRO ARG_RESULT +% #2 - PENDING_BS +% #3 - NEXT_TOKEN +% #4 is input stream until next backslash +% +% Input stream is either at the start of the argument, or just after a +% backslash sequence, either a lone backslash, or a doubled backslash. +% NEXT_TOKEN contains the first token in the input stream: if it is \finish, +% finish; otherwise, append to ARG_RESULT the segment of the argument up until +% the next backslash. PENDING_BACKSLASH contains a backslash to represent +% a backslash just before the start of the input stream that has not been +% added to ARG_RESULT. +@gdef@add_segment#1!#2#3#4\{% +@ifx#3@_finish + @call_the_macro#1!% +@else + % append the pending backslash to the result, followed by the next segment + @expandafter@is_fi@look_ahead#1#2#4!{\}@fi + % this @fi is discarded by @look_ahead. + % we can't get rid of it with \expandafter because we don't know how + % long #4 is. +} + +% #1 - THE_MACRO +% #2 - ARG_RESULT +% #3 discards the res of the conditional in @add_segment, and @is_fi ends the +% conditional. +@gdef@call_the_macro#1#2!#3@fi{@is_fi #1{#2}} + +} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% \braceorline MAC is used for a one-argument macro MAC. It checks +% whether the next non-whitespace character is a {. It sets the context +% for reading the argument (slightly different in the two cases). Then, +% to read the argument, in the whole-line case, it then calls the regular +% \parsearg MAC; in the lbrace case, it calls \passargtomacro MAC. +% +\def\braceorline#1{\let\macnamexxx=#1\futurelet\nchar\braceorlinexxx} +\def\braceorlinexxx{% + \ifx\nchar\bgroup + \macroargctxt + \expandafter\passargtomacro + \else + \macrolineargctxt\expandafter\parsearg + \fi \macnamexxx} + + +% @alias. +% We need some trickery to remove the optional spaces around the equal +% sign. Make them active and then expand them all to nothing. +% +\def\alias{\parseargusing\obeyspaces\aliasxxx} +\def\aliasxxx #1{\aliasyyy#1\relax} +\def\aliasyyy #1=#2\relax{% + {% + \expandafter\let\obeyedspace=\empty + \addtomacrolist{#1}% + \xdef\next{\global\let\makecsname{#1}=\makecsname{#2}}% + }% + \next +} + + +\message{cross references,} + +\newwrite\auxfile +\newif\ifhavexrefs % True if xref values are known. +\newif\ifwarnedxrefs % True if we warned once that they aren't known. + +% @inforef is relatively simple. +\def\inforef #1{\inforefzzz #1,,,,**} +\def\inforefzzz #1,#2,#3,#4**{% + \putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}}, + node \samp{\ignorespaces#1{}}} + +% @node's only job in TeX is to define \lastnode, which is used in +% cross-references. The @node line might or might not have commas, and +% might or might not have spaces before the first comma, like: +% @node foo , bar , ... +% We don't want such trailing spaces in the node name. +% +\parseargdef\node{\checkenv{}\donode #1 ,\finishnodeparse} +% +% also remove a trailing comma, in case of something like this: +% @node Help-Cross, , , Cross-refs +\def\donode#1 ,#2\finishnodeparse{\dodonode #1,\finishnodeparse} +\def\dodonode#1,#2\finishnodeparse{\gdef\lastnode{#1}} + +\let\nwnode=\node +\let\lastnode=\empty + +% Write a cross-reference definition for the current node. #1 is the +% type (Ynumbered, Yappendix, Ynothing). +% +\def\donoderef#1{% + \ifx\lastnode\empty\else + \setref{\lastnode}{#1}% + \global\let\lastnode=\empty + \fi +} + +% @anchor{NAME} -- define xref target at arbitrary point. +% +\newcount\savesfregister +% +\def\savesf{\relax \ifhmode \savesfregister=\spacefactor \fi} +\def\restoresf{\relax \ifhmode \spacefactor=\savesfregister \fi} +\def\anchor#1{\savesf \setref{#1}{Ynothing}\restoresf \ignorespaces} + +% \setref{NAME}{SNT} defines a cross-reference point NAME (a node or an +% anchor), which consists of three parts: +% 1) NAME-title - the current sectioning name taken from \lastsection, +% or the anchor name. +% 2) NAME-snt - section number and type, passed as the SNT arg, or +% empty for anchors. +% 3) NAME-pg - the page number. +% +% This is called from \donoderef, \anchor, and \dofloat. In the case of +% floats, there is an additional part, which is not written here: +% 4) NAME-lof - the text as it should appear in a @listoffloats. +% +\def\setref#1#2{% + \pdfmkdest{#1}% + \iflinks + {% + \requireauxfile + \atdummies % preserve commands, but don't expand them + % match definition in \xrdef, \refx, \xrefX. + \def\value##1{##1}% + \edef\writexrdef##1##2{% + \write\auxfile{@xrdef{#1-% #1 of \setref, expanded by the \edef + ##1}{##2}}% these are parameters of \writexrdef + }% + \toks0 = \expandafter{\lastsection}% + \immediate \writexrdef{title}{\the\toks0 }% + \immediate \writexrdef{snt}{\csname #2\endcsname}% \Ynumbered etc. + \safewhatsit{\writexrdef{pg}{\folio}}% will be written later, at \shipout + }% + \fi +} + +% @xrefautosectiontitle on|off says whether @section(ing) names are used +% automatically in xrefs, if the third arg is not explicitly specified. +% This was provided as a "secret" @set xref-automatic-section-title +% variable, now it's official. +% +\parseargdef\xrefautomaticsectiontitle{% + \def\temp{#1}% + \ifx\temp\onword + \expandafter\let\csname SETxref-automatic-section-title\endcsname + = \empty + \else\ifx\temp\offword + \expandafter\let\csname SETxref-automatic-section-title\endcsname + = \relax + \else + \errhelp = \EMsimple + \errmessage{Unknown @xrefautomaticsectiontitle value `\temp', + must be on|off}% + \fi\fi +} + +% +% @xref, @pxref, and @ref generate cross-references. For \xrefX, #1 is +% the node name, #2 the name of the Info cross-reference, #3 the printed +% node name, #4 the name of the Info file, #5 the name of the printed +% manual. All but the node name can be omitted. +% +\def\pxref{\putwordsee{} \xrefXX} +\def\xref{\putwordSee{} \xrefXX} +\def\ref{\xrefXX} + +\def\xrefXX#1{\def\xrefXXarg{#1}\futurelet\tokenafterxref\xrefXXX} +\def\xrefXXX{\expandafter\xrefX\expandafter[\xrefXXarg,,,,,,,]} +% +\newbox\toprefbox +\newbox\printedrefnamebox +\newbox\infofilenamebox +\newbox\printedmanualbox +% +\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup + \unsepspaces + % + % Get args without leading/trailing spaces. + \def\printedrefname{\ignorespaces #3}% + \setbox\printedrefnamebox = \hbox{\printedrefname\unskip}% + % + \def\infofilename{\ignorespaces #4}% + \setbox\infofilenamebox = \hbox{\infofilename\unskip}% + % + \def\printedmanual{\ignorespaces #5}% + \setbox\printedmanualbox = \hbox{\printedmanual\unskip}% + % + % If the printed reference name (arg #3) was not explicitly given in + % the @xref, figure out what we want to use. + \ifdim \wd\printedrefnamebox = 0pt + % No printed node name was explicitly given. + \expandafter\ifx\csname SETxref-automatic-section-title\endcsname \relax + % Not auto section-title: use node name inside the square brackets. + \def\printedrefname{\ignorespaces #1}% + \else + % Auto section-title: use chapter/section title inside + % the square brackets if we have it. + \ifdim \wd\printedmanualbox > 0pt + % It is in another manual, so we don't have it; use node name. + \def\printedrefname{\ignorespaces #1}% + \else + \ifhavexrefs + % We (should) know the real title if we have the xref values. + \def\printedrefname{\refx{#1-title}{}}% + \else + % Otherwise just copy the Info node name. + \def\printedrefname{\ignorespaces #1}% + \fi% + \fi + \fi + \fi + % + % Make link in pdf output. + \ifpdf + % For pdfTeX and LuaTeX + {\indexnofonts + \makevalueexpandable + \turnoffactive + % This expands tokens, so do it after making catcode changes, so _ + % etc. don't get their TeX definitions. This ignores all spaces in + % #4, including (wrongly) those in the middle of the filename. + \getfilename{#4}% + % + % This (wrongly) does not take account of leading or trailing + % spaces in #1, which should be ignored. + \setpdfdestname{#1}% + % + \ifx\pdfdestname\empty + \def\pdfdestname{Top}% no empty targets + \fi + % + \leavevmode + \startlink attr{/Border [0 0 0]}% + \ifnum\filenamelength>0 + goto file{\the\filename.pdf} name{\pdfdestname}% + \else + goto name{\pdfmkpgn{\pdfdestname}}% + \fi + }% + \setcolor{\linkcolor}% + \else + \ifx\XeTeXrevision\thisisundefined + \else + % For XeTeX + {\indexnofonts + \makevalueexpandable + \turnoffactive + % This expands tokens, so do it after making catcode changes, so _ + % etc. don't get their TeX definitions. This ignores all spaces in + % #4, including (wrongly) those in the middle of the filename. + \getfilename{#4}% + % + % This (wrongly) does not take account of leading or trailing + % spaces in #1, which should be ignored. + \setpdfdestname{#1}% + % + \ifx\pdfdestname\empty + \def\pdfdestname{Top}% no empty targets + \fi + % + \leavevmode + \ifnum\filenamelength>0 + % With default settings, + % XeTeX (xdvipdfmx) replaces link destination names with integers. + % In this case, the replaced destination names of + % remote PDFs are no longer known. In order to avoid a replacement, + % you can use xdvipdfmx's command line option `-C 0x0010'. + % If you use XeTeX 0.99996+ (TeX Live 2016+), + % this command line option is no longer necessary + % because we can use the `dvipdfmx:config' special. + \special{pdf:bann << /Border [0 0 0] /Type /Annot /Subtype /Link /A + << /S /GoToR /F (\the\filename.pdf) /D (\pdfdestname) >> >>}% + \else + \special{pdf:bann << /Border [0 0 0] /Type /Annot /Subtype /Link /A + << /S /GoTo /D (\pdfdestname) >> >>}% + \fi + }% + \setcolor{\linkcolor}% + \fi + \fi + {% + % Have to otherify everything special to allow the \csname to + % include an _ in the xref name, etc. + \indexnofonts + \turnoffactive + \def\value##1{##1}% + \expandafter\global\expandafter\let\expandafter\Xthisreftitle + \csname XR#1-title\endcsname + }% + % + % Float references are printed completely differently: "Figure 1.2" + % instead of "[somenode], p.3". \iffloat distinguishes them by + % \Xthisreftitle being set to a magic string. + \iffloat\Xthisreftitle + % If the user specified the print name (third arg) to the ref, + % print it instead of our usual "Figure 1.2". + \ifdim\wd\printedrefnamebox = 0pt + \refx{#1-snt}{}% + \else + \printedrefname + \fi + % + % If the user also gave the printed manual name (fifth arg), append + % "in MANUALNAME". + \ifdim \wd\printedmanualbox > 0pt + \space \putwordin{} \cite{\printedmanual}% + \fi + \else + % node/anchor (non-float) references. + % + % If we use \unhbox to print the node names, TeX does not insert + % empty discretionaries after hyphens, which means that it will not + % find a line break at a hyphen in a node names. Since some manuals + % are best written with fairly long node names, containing hyphens, + % this is a loss. Therefore, we give the text of the node name + % again, so it is as if TeX is seeing it for the first time. + % + \ifdim \wd\printedmanualbox > 0pt + % Cross-manual reference with a printed manual name. + % + \crossmanualxref{\cite{\printedmanual\unskip}}% + % + \else\ifdim \wd\infofilenamebox > 0pt + % Cross-manual reference with only an info filename (arg 4), no + % printed manual name (arg 5). This is essentially the same as + % the case above; we output the filename, since we have nothing else. + % + \crossmanualxref{\code{\infofilename\unskip}}% + % + \else + % Reference within this manual. + % + % _ (for example) has to be the character _ for the purposes of the + % control sequence corresponding to the node, but it has to expand + % into the usual \leavevmode...\vrule stuff for purposes of + % printing. So we \turnoffactive for the \refx-snt, back on for the + % printing, back off for the \refx-pg. + {\turnoffactive + % Only output a following space if the -snt ref is nonempty; for + % @unnumbered and @anchor, it won't be. + \setbox2 = \hbox{\ignorespaces \refx{#1-snt}{}}% + \ifdim \wd2 > 0pt \refx{#1-snt}\space\fi + }% + % output the `[mynode]' via the macro below so it can be overridden. + \xrefprintnodename\printedrefname + % + % But we always want a comma and a space: + ,\space + % + % output the `page 3'. + \turnoffactive \putwordpage\tie\refx{#1-pg}{}% + % Add a , if xref followed by a space + \if\space\noexpand\tokenafterxref ,% + \else\ifx\ \tokenafterxref ,% @TAB + \else\ifx\*\tokenafterxref ,% @* + \else\ifx\ \tokenafterxref ,% @SPACE + \else\ifx\ + \tokenafterxref ,% @NL + \else\ifx\tie\tokenafterxref ,% @tie + \fi\fi\fi\fi\fi\fi + \fi\fi + \fi + \endlink +\endgroup} + +% Output a cross-manual xref to #1. Used just above (twice). +% +% Only include the text "Section ``foo'' in" if the foo is neither +% missing or Top. Thus, @xref{,,,foo,The Foo Manual} outputs simply +% "see The Foo Manual", the idea being to refer to the whole manual. +% +% But, this being TeX, we can't easily compare our node name against the +% string "Top" while ignoring the possible spaces before and after in +% the input. By adding the arbitrary 7sp below, we make it much less +% likely that a real node name would have the same width as "Top" (e.g., +% in a monospaced font). Hopefully it will never happen in practice. +% +% For the same basic reason, we retypeset the "Top" at every +% reference, since the current font is indeterminate. +% +\def\crossmanualxref#1{% + \setbox\toprefbox = \hbox{Top\kern7sp}% + \setbox2 = \hbox{\ignorespaces \printedrefname \unskip \kern7sp}% + \ifdim \wd2 > 7sp % nonempty? + \ifdim \wd2 = \wd\toprefbox \else % same as Top? + \putwordSection{} ``\printedrefname'' \putwordin{}\space + \fi + \fi + #1% +} + +% This macro is called from \xrefX for the `[nodename]' part of xref +% output. It's a separate macro only so it can be changed more easily, +% since square brackets don't work well in some documents. Particularly +% one that Bob is working on :). +% +\def\xrefprintnodename#1{[#1]} + +% Things referred to by \setref. +% +\def\Ynothing{} +\def\Yomitfromtoc{} +\def\Ynumbered{% + \ifnum\secno=0 + \putwordChapter@tie \the\chapno + \else \ifnum\subsecno=0 + \putwordSection@tie \the\chapno.\the\secno + \else \ifnum\subsubsecno=0 + \putwordSection@tie \the\chapno.\the\secno.\the\subsecno + \else + \putwordSection@tie \the\chapno.\the\secno.\the\subsecno.\the\subsubsecno + \fi\fi\fi +} +\def\Yappendix{% + \ifnum\secno=0 + \putwordAppendix@tie @char\the\appendixno{}% + \else \ifnum\subsecno=0 + \putwordSection@tie @char\the\appendixno.\the\secno + \else \ifnum\subsubsecno=0 + \putwordSection@tie @char\the\appendixno.\the\secno.\the\subsecno + \else + \putwordSection@tie + @char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno + \fi\fi\fi +} + +% \refx{NAME}{SUFFIX} - reference a cross-reference string named NAME. SUFFIX +% is output afterwards if non-empty. +\def\refx#1#2{% + \requireauxfile + {% + \indexnofonts + \otherbackslash + \def\value##1{##1}% + \expandafter\global\expandafter\let\expandafter\thisrefX + \csname XR#1\endcsname + }% + \ifx\thisrefX\relax + % If not defined, say something at least. + \angleleft un\-de\-fined\angleright + \iflinks + \ifhavexrefs + {\toks0 = {#1}% avoid expansion of possibly-complex value + \message{\linenumber Undefined cross reference `\the\toks0'.}}% + \else + \ifwarnedxrefs\else + \global\warnedxrefstrue + \message{Cross reference values unknown; you must run TeX again.}% + \fi + \fi + \fi + \else + % It's defined, so just use it. + \thisrefX + \fi + #2% Output the suffix in any case. +} + +% This is the macro invoked by entries in the aux file. Define a control +% sequence for a cross-reference target (we prepend XR to the control sequence +% name to avoid collisions). The value is the page number. If this is a float +% type, we have more work to do. +% +\def\xrdef#1#2{% + {% Expand the node or anchor name to remove control sequences. + % \turnoffactive stops 8-bit characters being changed to commands + % like @'e. \refx does the same to retrieve the value in the definition. + \indexnofonts + \turnoffactive + \def\value##1{##1}% + \xdef\safexrefname{#1}% + }% + % + \expandafter\gdef\csname XR\safexrefname\endcsname{#2}% remember this xref + % + % Was that xref control sequence that we just defined for a float? + \expandafter\iffloat\csname XR\safexrefname\endcsname + % it was a float, and we have the (safe) float type in \iffloattype. + \expandafter\let\expandafter\floatlist + \csname floatlist\iffloattype\endcsname + % + % Is this the first time we've seen this float type? + \expandafter\ifx\floatlist\relax + \toks0 = {\do}% yes, so just \do + \else + % had it before, so preserve previous elements in list. + \toks0 = \expandafter{\floatlist\do}% + \fi + % + % Remember this xref in the control sequence \floatlistFLOATTYPE, + % for later use in \listoffloats. + \expandafter\xdef\csname floatlist\iffloattype\endcsname{\the\toks0 + {\safexrefname}}% + \fi +} + +% If working on a large document in chapters, it is convenient to +% be able to disable indexing, cross-referencing, and contents, for test runs. +% This is done with @novalidate at the beginning of the file. +% +\newif\iflinks \linkstrue % by default we want the aux files. +\let\novalidate = \linksfalse + +% Used when writing to the aux file, or when using data from it. +\def\requireauxfile{% + \iflinks + \tryauxfile + % Open the new aux file. TeX will close it automatically at exit. + \immediate\openout\auxfile=\jobname.aux + \fi + \global\let\requireauxfile=\relax % Only do this once. +} + +% Read the last existing aux file, if any. No error if none exists. +% +\def\tryauxfile{% + \openin 1 \jobname.aux + \ifeof 1 \else + \readdatafile{aux}% + \global\havexrefstrue + \fi + \closein 1 +} + +\def\setupdatafile{% + \catcode`\^^@=\other + \catcode`\^^A=\other + \catcode`\^^B=\other + \catcode`\^^C=\other + \catcode`\^^D=\other + \catcode`\^^E=\other + \catcode`\^^F=\other + \catcode`\^^G=\other + \catcode`\^^H=\other + \catcode`\^^K=\other + \catcode`\^^L=\other + \catcode`\^^N=\other + \catcode`\^^P=\other + \catcode`\^^Q=\other + \catcode`\^^R=\other + \catcode`\^^S=\other + \catcode`\^^T=\other + \catcode`\^^U=\other + \catcode`\^^V=\other + \catcode`\^^W=\other + \catcode`\^^X=\other + \catcode`\^^Z=\other + \catcode`\^^[=\other + \catcode`\^^\=\other + \catcode`\^^]=\other + \catcode`\^^^=\other + \catcode`\^^_=\other + % It was suggested to set the catcode of ^ to 7, which would allow ^^e4 etc. + % in xref tags, i.e., node names. But since ^^e4 notation isn't + % supported in the main text, it doesn't seem desirable. Furthermore, + % that is not enough: for node names that actually contain a ^ + % character, we would end up writing a line like this: 'xrdef {'hat + % b-title}{'hat b} and \xrdef does a \csname...\endcsname on the first + % argument, and \hat is not an expandable control sequence. It could + % all be worked out, but why? Either we support ^^ or we don't. + % + % The other change necessary for this was to define \auxhat: + % \def\auxhat{\def^{'hat }}% extra space so ok if followed by letter + % and then to call \auxhat in \setq. + % + \catcode`\^=\other + % + % Special characters. Should be turned off anyway, but... + \catcode`\~=\other + \catcode`\[=\other + \catcode`\]=\other + \catcode`\"=\other + \catcode`\_=\other + \catcode`\|=\other + \catcode`\<=\other + \catcode`\>=\other + \catcode`\$=\other + \catcode`\#=\other + \catcode`\&=\other + \catcode`\%=\other + \catcode`+=\other % avoid \+ for paranoia even though we've turned it off + % + % This is to support \ in node names and titles, since the \ + % characters end up in a \csname. It's easier than + % leaving it active and making its active definition an actual \ + % character. What I don't understand is why it works in the *value* + % of the xrdef. Seems like it should be a catcode12 \, and that + % should not typeset properly. But it works, so I'm moving on for + % now. --karl, 15jan04. + \catcode`\\=\other + % + % @ is our escape character in .aux files, and we need braces. + \catcode`\{=1 + \catcode`\}=2 + \catcode`\@=0 +} + +\def\readdatafile#1{% +\begingroup + \setupdatafile + \input\jobname.#1 +\endgroup} + + +\message{insertions,} +% including footnotes. + +\newcount \footnoteno + +% The trailing space in the following definition for supereject is +% vital for proper filling; pages come out unaligned when you do a +% pagealignmacro call if that space before the closing brace is +% removed. (Generally, numeric constants should always be followed by a +% space to prevent strange expansion errors.) +\def\supereject{\par\penalty -20000\footnoteno =0 } + +% @footnotestyle is meaningful for Info output only. +\let\footnotestyle=\comment + +{\catcode `\@=11 +% +% Auto-number footnotes. Otherwise like plain. +\gdef\footnote{% + \global\advance\footnoteno by \@ne + \edef\thisfootno{$^{\the\footnoteno}$}% + % + % In case the footnote comes at the end of a sentence, preserve the + % extra spacing after we do the footnote number. + \let\@sf\empty + \ifhmode\edef\@sf{\spacefactor\the\spacefactor}\ptexslash\fi + % + % Remove inadvertent blank space before typesetting the footnote number. + \unskip + \thisfootno\@sf + \dofootnote +}% + +% Don't bother with the trickery in plain.tex to not require the +% footnote text as a parameter. Our footnotes don't need to be so general. +% +% Oh yes, they do; otherwise, @ifset (and anything else that uses +% \parseargline) fails inside footnotes because the tokens are fixed when +% the footnote is read. --karl, 16nov96. +% +\gdef\dofootnote{% + \insert\footins\bgroup + % + % Nested footnotes are not supported in TeX, that would take a lot + % more work. (\startsavinginserts does not suffice.) + \let\footnote=\errfootnotenest + % + % We want to typeset this text as a normal paragraph, even if the + % footnote reference occurs in (for example) a display environment. + % So reset some parameters. + \hsize=\txipagewidth + \interlinepenalty\interfootnotelinepenalty + \splittopskip\ht\strutbox % top baseline for broken footnotes + \splitmaxdepth\dp\strutbox + \floatingpenalty\@MM + \leftskip\z@skip + \rightskip\z@skip + \spaceskip\z@skip + \xspaceskip\z@skip + \parindent\defaultparindent + % + \smallfonts \rm + % + % Because we use hanging indentation in footnotes, a @noindent appears + % to exdent this text, so make it be a no-op. makeinfo does not use + % hanging indentation so @noindent can still be needed within footnote + % text after an @example or the like (not that this is good style). + \let\noindent = \relax + % + % Hang the footnote text off the number. Use \everypar in case the + % footnote extends for more than one paragraph. + \everypar = {\hang}% + \textindent{\thisfootno}% + % + % Don't crash into the line above the footnote text. Since this + % expands into a box, it must come within the paragraph, lest it + % provide a place where TeX can split the footnote. + \footstrut + % + % Invoke rest of plain TeX footnote routine. + \futurelet\next\fo@t +} +}%end \catcode `\@=11 + +\def\errfootnotenest{% + \errhelp=\EMsimple + \errmessage{Nested footnotes not supported in texinfo.tex, + even though they work in makeinfo; sorry} +} + +\def\errfootnoteheading{% + \errhelp=\EMsimple + \errmessage{Footnotes in chapters, sections, etc., are not supported} +} + +% In case a @footnote appears in a vbox, save the footnote text and create +% the real \insert just after the vbox finished. Otherwise, the insertion +% would be lost. +% Similarly, if a @footnote appears inside an alignment, save the footnote +% text to a box and make the \insert when a row of the table is finished. +% And the same can be done for other insert classes. --kasal, 16nov03. +% +% Replace the \insert primitive by a cheating macro. +% Deeper inside, just make sure that the saved insertions are not spilled +% out prematurely. +% +\def\startsavinginserts{% + \ifx \insert\ptexinsert + \let\insert\saveinsert + \else + \let\checkinserts\relax + \fi +} + +% This \insert replacement works for both \insert\footins{foo} and +% \insert\footins\bgroup foo\egroup, but it doesn't work for \insert27{foo}. +% +\def\saveinsert#1{% + \edef\next{\noexpand\savetobox \makeSAVEname#1}% + \afterassignment\next + % swallow the left brace + \let\temp = +} +\def\makeSAVEname#1{\makecsname{SAVE\expandafter\gobble\string#1}} +\def\savetobox#1{\global\setbox#1 = \vbox\bgroup \unvbox#1} + +\def\checksaveins#1{\ifvoid#1\else \placesaveins#1\fi} + +\def\placesaveins#1{% + \ptexinsert \csname\expandafter\gobblesave\string#1\endcsname + {\box#1}% +} + +% eat @SAVE -- beware, all of them have catcode \other: +{ + \def\dospecials{\do S\do A\do V\do E} \uncatcodespecials % ;-) + \gdef\gobblesave @SAVE{} +} + +% initialization: +\def\newsaveins #1{% + \edef\next{\noexpand\newsaveinsX \makeSAVEname#1}% + \next +} +\def\newsaveinsX #1{% + \csname newbox\endcsname #1% + \expandafter\def\expandafter\checkinserts\expandafter{\checkinserts + \checksaveins #1}% +} + +% initialize: +\let\checkinserts\empty +\newsaveins\footins +\newsaveins\margin + + +% @image. We use the macros from epsf.tex to support this. +% If epsf.tex is not installed and @image is used, we complain. +% +% Check for and read epsf.tex up front. If we read it only at @image +% time, we might be inside a group, and then its definitions would get +% undone and the next image would fail. +\openin 1 = epsf.tex +\ifeof 1 \else + % Do not bother showing banner with epsf.tex v2.7k (available in + % doc/epsf.tex and on ctan). + \def\epsfannounce{\toks0 = }% + \input epsf.tex +\fi +\closein 1 +% +% We will only complain once about lack of epsf.tex. +\newif\ifwarnednoepsf +\newhelp\noepsfhelp{epsf.tex must be installed for images to + work. It is also included in the Texinfo distribution, or you can get + it from ftp://tug.org/tex/epsf.tex.} +% +\def\image#1{% + \ifx\epsfbox\thisisundefined + \ifwarnednoepsf \else + \errhelp = \noepsfhelp + \errmessage{epsf.tex not found, images will be ignored}% + \global\warnednoepsftrue + \fi + \else + \imagexxx #1,,,,,\finish + \fi +} +% +% Arguments to @image: +% #1 is (mandatory) image filename; we tack on .eps extension. +% #2 is (optional) width, #3 is (optional) height. +% #4 is (ignored optional) html alt text. +% #5 is (ignored optional) extension. +% #6 is just the usual extra ignored arg for parsing stuff. +\newif\ifimagevmode +\def\imagexxx#1,#2,#3,#4,#5,#6\finish{\begingroup + \catcode`\^^M = 5 % in case we're inside an example + \normalturnoffactive % allow _ et al. in names + \def\xprocessmacroarg{\eatspaces}% in case we are being used via a macro + % If the image is by itself, center it. + \ifvmode + \imagevmodetrue + \else \ifx\centersub\centerV + % for @center @image, we need a vbox so we can have our vertical space + \imagevmodetrue + \vbox\bgroup % vbox has better behavior than vtop herev + \fi\fi + % + \ifimagevmode + \nobreak\medskip + % Usually we'll have text after the image which will insert + % \parskip glue, so insert it here too to equalize the space + % above and below. + \nobreak\vskip\parskip + \nobreak + \fi + % + % Leave vertical mode so that indentation from an enclosing + % environment such as @quotation is respected. + % However, if we're at the top level, we don't want the + % normal paragraph indentation. + % On the other hand, if we are in the case of @center @image, we don't + % want to start a paragraph, which will create a hsize-width box and + % eradicate the centering. + \ifx\centersub\centerV\else \noindent \fi + % + % Output the image. + \ifpdf + % For pdfTeX and LuaTeX <= 0.80 + \dopdfimage{#1}{#2}{#3}% + \else + \ifx\XeTeXrevision\thisisundefined + % For epsf.tex + % \epsfbox itself resets \epsf?size at each figure. + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0 > 0pt \epsfxsize=#2\relax \fi + \setbox0 = \hbox{\ignorespaces #3}% + \ifdim\wd0 > 0pt \epsfysize=#3\relax \fi + \epsfbox{#1.eps}% + \else + % For XeTeX + \doxeteximage{#1}{#2}{#3}% + \fi + \fi + % + \ifimagevmode + \medskip % space after a standalone image + \fi + \ifx\centersub\centerV \egroup \fi +\endgroup} + + +% @float FLOATTYPE,LABEL,LOC ... @end float for displayed figures, tables, +% etc. We don't actually implement floating yet, we always include the +% float "here". But it seemed the best name for the future. +% +\envparseargdef\float{\eatcommaspace\eatcommaspace\dofloat#1, , ,\finish} + +% There may be a space before second and/or third parameter; delete it. +\def\eatcommaspace#1, {#1,} + +% #1 is the optional FLOATTYPE, the text label for this float, typically +% "Figure", "Table", "Example", etc. Can't contain commas. If omitted, +% this float will not be numbered and cannot be referred to. +% +% #2 is the optional xref label. Also must be present for the float to +% be referable. +% +% #3 is the optional positioning argument; for now, it is ignored. It +% will somehow specify the positions allowed to float to (here, top, bottom). +% +% We keep a separate counter for each FLOATTYPE, which we reset at each +% chapter-level command. +\let\resetallfloatnos=\empty +% +\def\dofloat#1,#2,#3,#4\finish{% + \let\thiscaption=\empty + \let\thisshortcaption=\empty + % + % don't lose footnotes inside @float. + % + % BEWARE: when the floats start float, we have to issue warning whenever an + % insert appears inside a float which could possibly float. --kasal, 26may04 + % + \startsavinginserts + % + % We can't be used inside a paragraph. + \par + % + \vtop\bgroup + \def\floattype{#1}% + \def\floatlabel{#2}% + \def\floatloc{#3}% we do nothing with this yet. + % + \ifx\floattype\empty + \let\safefloattype=\empty + \else + {% + % the floattype might have accents or other special characters, + % but we need to use it in a control sequence name. + \indexnofonts + \turnoffactive + \xdef\safefloattype{\floattype}% + }% + \fi + % + % If label is given but no type, we handle that as the empty type. + \ifx\floatlabel\empty \else + % We want each FLOATTYPE to be numbered separately (Figure 1, + % Table 1, Figure 2, ...). (And if no label, no number.) + % + \expandafter\getfloatno\csname\safefloattype floatno\endcsname + \global\advance\floatno by 1 + % + {% + % This magic value for \lastsection is output by \setref as the + % XREFLABEL-title value. \xrefX uses it to distinguish float + % labels (which have a completely different output format) from + % node and anchor labels. And \xrdef uses it to construct the + % lists of floats. + % + \edef\lastsection{\floatmagic=\safefloattype}% + \setref{\floatlabel}{Yfloat}% + }% + \fi + % + % start with \parskip glue, I guess. + \vskip\parskip + % + % Don't suppress indentation if a float happens to start a section. + \restorefirstparagraphindent +} + +% we have these possibilities: +% @float Foo,lbl & @caption{Cap}: Foo 1.1: Cap +% @float Foo,lbl & no caption: Foo 1.1 +% @float Foo & @caption{Cap}: Foo: Cap +% @float Foo & no caption: Foo +% @float ,lbl & Caption{Cap}: 1.1: Cap +% @float ,lbl & no caption: 1.1 +% @float & @caption{Cap}: Cap +% @float & no caption: +% +\def\Efloat{% + \let\floatident = \empty + % + % In all cases, if we have a float type, it comes first. + \ifx\floattype\empty \else \def\floatident{\floattype}\fi + % + % If we have an xref label, the number comes next. + \ifx\floatlabel\empty \else + \ifx\floattype\empty \else % if also had float type, need tie first. + \appendtomacro\floatident{\tie}% + \fi + % the number. + \appendtomacro\floatident{\chaplevelprefix\the\floatno}% + \fi + % + % Start the printed caption with what we've constructed in + % \floatident, but keep it separate; we need \floatident again. + \let\captionline = \floatident + % + \ifx\thiscaption\empty \else + \ifx\floatident\empty \else + \appendtomacro\captionline{: }% had ident, so need a colon between + \fi + % + % caption text. + \appendtomacro\captionline{\scanexp\thiscaption}% + \fi + % + % If we have anything to print, print it, with space before. + % Eventually this needs to become an \insert. + \ifx\captionline\empty \else + \vskip.5\parskip + \captionline + % + % Space below caption. + \vskip\parskip + \fi + % + % If have an xref label, write the list of floats info. Do this + % after the caption, to avoid chance of it being a breakpoint. + \ifx\floatlabel\empty \else + % Write the text that goes in the lof to the aux file as + % \floatlabel-lof. Besides \floatident, we include the short + % caption if specified, else the full caption if specified, else nothing. + {% + \requireauxfile + \atdummies + % + \ifx\thisshortcaption\empty + \def\gtemp{\thiscaption}% + \else + \def\gtemp{\thisshortcaption}% + \fi + \immediate\write\auxfile{@xrdef{\floatlabel-lof}{\floatident + \ifx\gtemp\empty \else : \gtemp \fi}}% + }% + \fi + \egroup % end of \vtop + % + \checkinserts +} + +% Append the tokens #2 to the definition of macro #1, not expanding either. +% +\def\appendtomacro#1#2{% + \expandafter\def\expandafter#1\expandafter{#1#2}% +} + +% @caption, @shortcaption +% +\def\caption{\docaption\thiscaption} +\def\shortcaption{\docaption\thisshortcaption} +\def\docaption{\checkenv\float \bgroup\scanargctxt\defcaption} +\def\defcaption#1#2{\egroup \def#1{#2}} + +% The parameter is the control sequence identifying the counter we are +% going to use. Create it if it doesn't exist and assign it to \floatno. +\def\getfloatno#1{% + \ifx#1\relax + % Haven't seen this figure type before. + \csname newcount\endcsname #1% + % + % Remember to reset this floatno at the next chap. + \expandafter\gdef\expandafter\resetallfloatnos + \expandafter{\resetallfloatnos #1=0 }% + \fi + \let\floatno#1% +} + +% \setref calls this to get the XREFLABEL-snt value. We want an @xref +% to the FLOATLABEL to expand to "Figure 3.1". We call \setref when we +% first read the @float command. +% +\def\Yfloat{\floattype@tie \chaplevelprefix\the\floatno}% + +% Magic string used for the XREFLABEL-title value, so \xrefX can +% distinguish floats from other xref types. +\def\floatmagic{!!float!!} + +% #1 is the control sequence we are passed; we expand into a conditional +% which is true if #1 represents a float ref. That is, the magic +% \lastsection value which we \setref above. +% +\def\iffloat#1{\expandafter\doiffloat#1==\finish} +% +% #1 is (maybe) the \floatmagic string. If so, #2 will be the +% (safe) float type for this float. We set \iffloattype to #2. +% +\def\doiffloat#1=#2=#3\finish{% + \def\temp{#1}% + \def\iffloattype{#2}% + \ifx\temp\floatmagic +} + +% @listoffloats FLOATTYPE - print a list of floats like a table of contents. +% +\parseargdef\listoffloats{% + \def\floattype{#1}% floattype + {% + % the floattype might have accents or other special characters, + % but we need to use it in a control sequence name. + \indexnofonts + \turnoffactive + \xdef\safefloattype{\floattype}% + }% + % + % \xrdef saves the floats as a \do-list in \floatlistSAFEFLOATTYPE. + \expandafter\ifx\csname floatlist\safefloattype\endcsname \relax + \ifhavexrefs + % if the user said @listoffloats foo but never @float foo. + \message{\linenumber No `\safefloattype' floats to list.}% + \fi + \else + \begingroup + \leftskip=\tocindent % indent these entries like a toc + \let\do=\listoffloatsdo + \csname floatlist\safefloattype\endcsname + \endgroup + \fi +} + +% This is called on each entry in a list of floats. We're passed the +% xref label, in the form LABEL-title, which is how we save it in the +% aux file. We strip off the -title and look up \XRLABEL-lof, which +% has the text we're supposed to typeset here. +% +% Figures without xref labels will not be included in the list (since +% they won't appear in the aux file). +% +\def\listoffloatsdo#1{\listoffloatsdoentry#1\finish} +\def\listoffloatsdoentry#1-title\finish{{% + % Can't fully expand XR#1-lof because it can contain anything. Just + % pass the control sequence. On the other hand, XR#1-pg is just the + % page number, and we want to fully expand that so we can get a link + % in pdf output. + \toksA = \expandafter{\csname XR#1-lof\endcsname}% + % + % use the same \entry macro we use to generate the TOC and index. + \edef\writeentry{\noexpand\entry{\the\toksA}{\csname XR#1-pg\endcsname}}% + \writeentry +}} + + +\message{localization,} + +% For single-language documents, @documentlanguage is usually given very +% early, just after @documentencoding. Single argument is the language +% (de) or locale (de_DE) abbreviation. +% +{ + \catcode`\_ = \active + \globaldefs=1 +\parseargdef\documentlanguage{% + \tex % read txi-??.tex file in plain TeX. + % Read the file by the name they passed if it exists. + \let_ = \normalunderscore % normal _ character for filename test + \openin 1 txi-#1.tex + \ifeof 1 + \documentlanguagetrywithoutunderscore #1_\finish + \else + \globaldefs = 1 % everything in the txi-LL files needs to persist + \input txi-#1.tex + \fi + \closein 1 + \endgroup % end raw TeX +} +% +% If they passed de_DE, and txi-de_DE.tex doesn't exist, +% try txi-de.tex. +% +\gdef\documentlanguagetrywithoutunderscore#1_#2\finish{% + \openin 1 txi-#1.tex + \ifeof 1 + \errhelp = \nolanghelp + \errmessage{Cannot read language file txi-#1.tex}% + \else + \globaldefs = 1 % everything in the txi-LL files needs to persist + \input txi-#1.tex + \fi + \closein 1 +} +}% end of special _ catcode +% +\newhelp\nolanghelp{The given language definition file cannot be found or +is empty. Maybe you need to install it? Putting it in the current +directory should work if nowhere else does.} + +% This macro is called from txi-??.tex files; the first argument is the +% \language name to set (without the "\lang@" prefix), the second and +% third args are \{left,right}hyphenmin. +% +% The language names to pass are determined when the format is built. +% See the etex.log file created at that time, e.g., +% /usr/local/texlive/2008/texmf-var/web2c/pdftex/etex.log. +% +% With TeX Live 2008, etex now includes hyphenation patterns for all +% available languages. This means we can support hyphenation in +% Texinfo, at least to some extent. (This still doesn't solve the +% accented characters problem.) +% +\catcode`@=11 +\def\txisetlanguage#1#2#3{% + % do not set the language if the name is undefined in the current TeX. + \expandafter\ifx\csname lang@#1\endcsname \relax + \message{no patterns for #1}% + \else + \global\language = \csname lang@#1\endcsname + \fi + % but there is no harm in adjusting the hyphenmin values regardless. + \global\lefthyphenmin = #2\relax + \global\righthyphenmin = #3\relax +} + +% XeTeX and LuaTeX can handle Unicode natively. +% Their default I/O uses UTF-8 sequences instead of a byte-wise operation. +% Other TeX engines' I/O (pdfTeX, etc.) is byte-wise. +% +\newif\iftxinativeunicodecapable +\newif\iftxiusebytewiseio + +\ifx\XeTeXrevision\thisisundefined + \ifx\luatexversion\thisisundefined + \txinativeunicodecapablefalse + \txiusebytewiseiotrue + \else + \txinativeunicodecapabletrue + \txiusebytewiseiofalse + \fi +\else + \txinativeunicodecapabletrue + \txiusebytewiseiofalse +\fi + +% Set I/O by bytes instead of UTF-8 sequence for XeTeX and LuaTex +% for non-UTF-8 (byte-wise) encodings. +% +\def\setbytewiseio{% + \ifx\XeTeXrevision\thisisundefined + \else + \XeTeXdefaultencoding "bytes" % For subsequent files to be read + \XeTeXinputencoding "bytes" % For document root file + % Unfortunately, there seems to be no corresponding XeTeX command for + % output encoding. This is a problem for auxiliary index and TOC files. + % The only solution would be perhaps to write out @U{...} sequences in + % place of non-ASCII characters. + \fi + + \ifx\luatexversion\thisisundefined + \else + \directlua{ + local utf8_char, byte, gsub = unicode.utf8.char, string.byte, string.gsub + local function convert_char (char) + return utf8_char(byte(char)) + end + + local function convert_line (line) + return gsub(line, ".", convert_char) + end + + callback.register("process_input_buffer", convert_line) + + local function convert_line_out (line) + local line_out = "" + for c in string.utfvalues(line) do + line_out = line_out .. string.char(c) + end + return line_out + end + + callback.register("process_output_buffer", convert_line_out) + } + \fi + + \txiusebytewiseiotrue +} + + +% Helpers for encodings. +% Set the catcode of characters 128 through 255 to the specified number. +% +\def\setnonasciicharscatcode#1{% + \count255=128 + \loop\ifnum\count255<256 + \global\catcode\count255=#1\relax + \advance\count255 by 1 + \repeat +} + +\def\setnonasciicharscatcodenonglobal#1{% + \count255=128 + \loop\ifnum\count255<256 + \catcode\count255=#1\relax + \advance\count255 by 1 + \repeat +} + +% @documentencoding sets the definition of non-ASCII characters +% according to the specified encoding. +% +\def\documentencoding{\parseargusing\filenamecatcodes\documentencodingzzz} +\def\documentencodingzzz#1{% + % + % Encoding being declared for the document. + \def\declaredencoding{\csname #1.enc\endcsname}% + % + % Supported encodings: names converted to tokens in order to be able + % to compare them with \ifx. + \def\ascii{\csname US-ASCII.enc\endcsname}% + \def\latnine{\csname ISO-8859-15.enc\endcsname}% + \def\latone{\csname ISO-8859-1.enc\endcsname}% + \def\lattwo{\csname ISO-8859-2.enc\endcsname}% + \def\utfeight{\csname UTF-8.enc\endcsname}% + % + \ifx \declaredencoding \ascii + \asciichardefs + % + \else \ifx \declaredencoding \lattwo + \iftxinativeunicodecapable + \setbytewiseio + \fi + \setnonasciicharscatcode\active + \lattwochardefs + % + \else \ifx \declaredencoding \latone + \iftxinativeunicodecapable + \setbytewiseio + \fi + \setnonasciicharscatcode\active + \latonechardefs + % + \else \ifx \declaredencoding \latnine + \iftxinativeunicodecapable + \setbytewiseio + \fi + \setnonasciicharscatcode\active + \latninechardefs + % + \else \ifx \declaredencoding \utfeight + \iftxinativeunicodecapable + % For native Unicode handling (XeTeX and LuaTeX) + \nativeunicodechardefs + \else + % For treating UTF-8 as byte sequences (TeX, eTeX and pdfTeX) + \setnonasciicharscatcode\active + % since we already invoked \utfeightchardefs at the top level + % (below), do not re-invoke it, otherwise our check for duplicated + % definitions gets triggered. Making non-ascii chars active is + % sufficient. + \fi + % + \else + \message{Ignoring unknown document encoding: #1.}% + % + \fi % utfeight + \fi % latnine + \fi % latone + \fi % lattwo + \fi % ascii + % + \ifx\XeTeXrevision\thisisundefined + \else + \ifx \declaredencoding \utfeight + \else + \ifx \declaredencoding \ascii + \else + \message{Warning: XeTeX with non-UTF-8 encodings cannot handle % + non-ASCII characters in auxiliary files.}% + \fi + \fi + \fi +} + +% emacs-page +% A message to be logged when using a character that isn't available +% the default font encoding (OT1). +% +\def\missingcharmsg#1{\message{Character missing, sorry: #1.}} + +% Take account of \c (plain) vs. \, (Texinfo) difference. +\def\cedilla#1{\ifx\c\ptexc\c{#1}\else\,{#1}\fi} + +% First, make active non-ASCII characters in order for them to be +% correctly categorized when TeX reads the replacement text of +% macros containing the character definitions. +\setnonasciicharscatcode\active +% + +\def\gdefchar#1#2{% +\gdef#1{% + \ifpassthroughchars + \string#1% + \else + #2% + \fi +}} + +% Latin1 (ISO-8859-1) character definitions. +\def\latonechardefs{% + \gdefchar^^a0{\tie} + \gdefchar^^a1{\exclamdown} + \gdefchar^^a2{{\tcfont \char162}} % cent + \gdefchar^^a3{\pounds{}} + \gdefchar^^a4{{\tcfont \char164}} % currency + \gdefchar^^a5{{\tcfont \char165}} % yen + \gdefchar^^a6{{\tcfont \char166}} % broken bar + \gdefchar^^a7{\S} + \gdefchar^^a8{\"{}} + \gdefchar^^a9{\copyright{}} + \gdefchar^^aa{\ordf} + \gdefchar^^ab{\guillemetleft{}} + \gdefchar^^ac{\ensuremath\lnot} + \gdefchar^^ad{\-} + \gdefchar^^ae{\registeredsymbol{}} + \gdefchar^^af{\={}} + % + \gdefchar^^b0{\textdegree} + \gdefchar^^b1{$\pm$} + \gdefchar^^b2{$^2$} + \gdefchar^^b3{$^3$} + \gdefchar^^b4{\'{}} + \gdefchar^^b5{$\mu$} + \gdefchar^^b6{\P} + \gdefchar^^b7{\ensuremath\cdot} + \gdefchar^^b8{\cedilla\ } + \gdefchar^^b9{$^1$} + \gdefchar^^ba{\ordm} + \gdefchar^^bb{\guillemetright{}} + \gdefchar^^bc{$1\over4$} + \gdefchar^^bd{$1\over2$} + \gdefchar^^be{$3\over4$} + \gdefchar^^bf{\questiondown} + % + \gdefchar^^c0{\`A} + \gdefchar^^c1{\'A} + \gdefchar^^c2{\^A} + \gdefchar^^c3{\~A} + \gdefchar^^c4{\"A} + \gdefchar^^c5{\ringaccent A} + \gdefchar^^c6{\AE} + \gdefchar^^c7{\cedilla C} + \gdefchar^^c8{\`E} + \gdefchar^^c9{\'E} + \gdefchar^^ca{\^E} + \gdefchar^^cb{\"E} + \gdefchar^^cc{\`I} + \gdefchar^^cd{\'I} + \gdefchar^^ce{\^I} + \gdefchar^^cf{\"I} + % + \gdefchar^^d0{\DH} + \gdefchar^^d1{\~N} + \gdefchar^^d2{\`O} + \gdefchar^^d3{\'O} + \gdefchar^^d4{\^O} + \gdefchar^^d5{\~O} + \gdefchar^^d6{\"O} + \gdefchar^^d7{$\times$} + \gdefchar^^d8{\O} + \gdefchar^^d9{\`U} + \gdefchar^^da{\'U} + \gdefchar^^db{\^U} + \gdefchar^^dc{\"U} + \gdefchar^^dd{\'Y} + \gdefchar^^de{\TH} + \gdefchar^^df{\ss} + % + \gdefchar^^e0{\`a} + \gdefchar^^e1{\'a} + \gdefchar^^e2{\^a} + \gdefchar^^e3{\~a} + \gdefchar^^e4{\"a} + \gdefchar^^e5{\ringaccent a} + \gdefchar^^e6{\ae} + \gdefchar^^e7{\cedilla c} + \gdefchar^^e8{\`e} + \gdefchar^^e9{\'e} + \gdefchar^^ea{\^e} + \gdefchar^^eb{\"e} + \gdefchar^^ec{\`{\dotless i}} + \gdefchar^^ed{\'{\dotless i}} + \gdefchar^^ee{\^{\dotless i}} + \gdefchar^^ef{\"{\dotless i}} + % + \gdefchar^^f0{\dh} + \gdefchar^^f1{\~n} + \gdefchar^^f2{\`o} + \gdefchar^^f3{\'o} + \gdefchar^^f4{\^o} + \gdefchar^^f5{\~o} + \gdefchar^^f6{\"o} + \gdefchar^^f7{$\div$} + \gdefchar^^f8{\o} + \gdefchar^^f9{\`u} + \gdefchar^^fa{\'u} + \gdefchar^^fb{\^u} + \gdefchar^^fc{\"u} + \gdefchar^^fd{\'y} + \gdefchar^^fe{\th} + \gdefchar^^ff{\"y} +} + +% Latin9 (ISO-8859-15) encoding character definitions. +\def\latninechardefs{% + % Encoding is almost identical to Latin1. + \latonechardefs + % + \gdefchar^^a4{\euro{}} + \gdefchar^^a6{\v S} + \gdefchar^^a8{\v s} + \gdefchar^^b4{\v Z} + \gdefchar^^b8{\v z} + \gdefchar^^bc{\OE} + \gdefchar^^bd{\oe} + \gdefchar^^be{\"Y} +} + +% Latin2 (ISO-8859-2) character definitions. +\def\lattwochardefs{% + \gdefchar^^a0{\tie} + \gdefchar^^a1{\ogonek{A}} + \gdefchar^^a2{\u{}} + \gdefchar^^a3{\L} + \gdefchar^^a4{\missingcharmsg{CURRENCY SIGN}} + \gdefchar^^a5{\v L} + \gdefchar^^a6{\'S} + \gdefchar^^a7{\S} + \gdefchar^^a8{\"{}} + \gdefchar^^a9{\v S} + \gdefchar^^aa{\cedilla S} + \gdefchar^^ab{\v T} + \gdefchar^^ac{\'Z} + \gdefchar^^ad{\-} + \gdefchar^^ae{\v Z} + \gdefchar^^af{\dotaccent Z} + % + \gdefchar^^b0{\textdegree{}} + \gdefchar^^b1{\ogonek{a}} + \gdefchar^^b2{\ogonek{ }} + \gdefchar^^b3{\l} + \gdefchar^^b4{\'{}} + \gdefchar^^b5{\v l} + \gdefchar^^b6{\'s} + \gdefchar^^b7{\v{}} + \gdefchar^^b8{\cedilla\ } + \gdefchar^^b9{\v s} + \gdefchar^^ba{\cedilla s} + \gdefchar^^bb{\v t} + \gdefchar^^bc{\'z} + \gdefchar^^bd{\H{}} + \gdefchar^^be{\v z} + \gdefchar^^bf{\dotaccent z} + % + \gdefchar^^c0{\'R} + \gdefchar^^c1{\'A} + \gdefchar^^c2{\^A} + \gdefchar^^c3{\u A} + \gdefchar^^c4{\"A} + \gdefchar^^c5{\'L} + \gdefchar^^c6{\'C} + \gdefchar^^c7{\cedilla C} + \gdefchar^^c8{\v C} + \gdefchar^^c9{\'E} + \gdefchar^^ca{\ogonek{E}} + \gdefchar^^cb{\"E} + \gdefchar^^cc{\v E} + \gdefchar^^cd{\'I} + \gdefchar^^ce{\^I} + \gdefchar^^cf{\v D} + % + \gdefchar^^d0{\DH} + \gdefchar^^d1{\'N} + \gdefchar^^d2{\v N} + \gdefchar^^d3{\'O} + \gdefchar^^d4{\^O} + \gdefchar^^d5{\H O} + \gdefchar^^d6{\"O} + \gdefchar^^d7{$\times$} + \gdefchar^^d8{\v R} + \gdefchar^^d9{\ringaccent U} + \gdefchar^^da{\'U} + \gdefchar^^db{\H U} + \gdefchar^^dc{\"U} + \gdefchar^^dd{\'Y} + \gdefchar^^de{\cedilla T} + \gdefchar^^df{\ss} + % + \gdefchar^^e0{\'r} + \gdefchar^^e1{\'a} + \gdefchar^^e2{\^a} + \gdefchar^^e3{\u a} + \gdefchar^^e4{\"a} + \gdefchar^^e5{\'l} + \gdefchar^^e6{\'c} + \gdefchar^^e7{\cedilla c} + \gdefchar^^e8{\v c} + \gdefchar^^e9{\'e} + \gdefchar^^ea{\ogonek{e}} + \gdefchar^^eb{\"e} + \gdefchar^^ec{\v e} + \gdefchar^^ed{\'{\dotless{i}}} + \gdefchar^^ee{\^{\dotless{i}}} + \gdefchar^^ef{\v d} + % + \gdefchar^^f0{\dh} + \gdefchar^^f1{\'n} + \gdefchar^^f2{\v n} + \gdefchar^^f3{\'o} + \gdefchar^^f4{\^o} + \gdefchar^^f5{\H o} + \gdefchar^^f6{\"o} + \gdefchar^^f7{$\div$} + \gdefchar^^f8{\v r} + \gdefchar^^f9{\ringaccent u} + \gdefchar^^fa{\'u} + \gdefchar^^fb{\H u} + \gdefchar^^fc{\"u} + \gdefchar^^fd{\'y} + \gdefchar^^fe{\cedilla t} + \gdefchar^^ff{\dotaccent{}} +} + +% UTF-8 character definitions. +% +% This code to support UTF-8 is based on LaTeX's utf8.def, with some +% changes for Texinfo conventions. It is included here under the GPL by +% permission from Frank Mittelbach and the LaTeX team. +% +\newcount\countUTFx +\newcount\countUTFy +\newcount\countUTFz + +\gdef\UTFviiiTwoOctets#1#2{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\endcsname} +% +\gdef\UTFviiiThreeOctets#1#2#3{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\string #3\endcsname} +% +\gdef\UTFviiiFourOctets#1#2#3#4{\expandafter + \UTFviiiDefined\csname u8:#1\string #2\string #3\string #4\endcsname} + +\gdef\UTFviiiDefined#1{% + \ifx #1\relax + \message{\linenumber Unicode char \string #1 not defined for Texinfo}% + \else + \expandafter #1% + \fi +} + +% Give non-ASCII bytes the active definitions for processing UTF-8 sequences +\begingroup + \catcode`\~13 + \catcode`\$12 + \catcode`\"12 + + % Loop from \countUTFx to \countUTFy, performing \UTFviiiTmp + % substituting ~ and $ with a character token of that value. + \def\UTFviiiLoop{% + \global\catcode\countUTFx\active + \uccode`\~\countUTFx + \uccode`\$\countUTFx + \uppercase\expandafter{\UTFviiiTmp}% + \advance\countUTFx by 1 + \ifnum\countUTFx < \countUTFy + \expandafter\UTFviiiLoop + \fi} + + % For bytes other than the first in a UTF-8 sequence. Not expected to + % be expanded except when writing to auxiliary files. + \countUTFx = "80 + \countUTFy = "C2 + \def\UTFviiiTmp{% + \gdef~{% + \ifpassthroughchars $\fi}}% + \UTFviiiLoop + + \countUTFx = "C2 + \countUTFy = "E0 + \def\UTFviiiTmp{% + \gdef~{% + \ifpassthroughchars $% + \else\expandafter\UTFviiiTwoOctets\expandafter$\fi}}% + \UTFviiiLoop + + \countUTFx = "E0 + \countUTFy = "F0 + \def\UTFviiiTmp{% + \gdef~{% + \ifpassthroughchars $% + \else\expandafter\UTFviiiThreeOctets\expandafter$\fi}}% + \UTFviiiLoop + + \countUTFx = "F0 + \countUTFy = "F4 + \def\UTFviiiTmp{% + \gdef~{% + \ifpassthroughchars $% + \else\expandafter\UTFviiiFourOctets\expandafter$\fi + }}% + \UTFviiiLoop +\endgroup + +\def\globallet{\global\let} % save some \expandafter's below + +% @U{xxxx} to produce U+xxxx, if we support it. +\def\U#1{% + \expandafter\ifx\csname uni:#1\endcsname \relax + \iftxinativeunicodecapable + % All Unicode characters can be used if native Unicode handling is + % active. However, if the font does not have the glyph, + % letters are missing. + \begingroup + \uccode`\.="#1\relax + \uppercase{.} + \endgroup + \else + \errhelp = \EMsimple + \errmessage{Unicode character U+#1 not supported, sorry}% + \fi + \else + \csname uni:#1\endcsname + \fi +} + +% These macros are used here to construct the name of a control +% sequence to be defined. +\def\UTFviiiTwoOctetsName#1#2{% + \csname u8:#1\string #2\endcsname}% +\def\UTFviiiThreeOctetsName#1#2#3{% + \csname u8:#1\string #2\string #3\endcsname}% +\def\UTFviiiFourOctetsName#1#2#3#4{% + \csname u8:#1\string #2\string #3\string #4\endcsname}% + +% For UTF-8 byte sequences (TeX, e-TeX and pdfTeX), +% provide a definition macro to replace a Unicode character; +% this gets used by the @U command +% +\begingroup + \catcode`\"=12 + \catcode`\<=12 + \catcode`\.=12 + \catcode`\,=12 + \catcode`\;=12 + \catcode`\!=12 + \catcode`\~=13 + \gdef\DeclareUnicodeCharacterUTFviii#1#2{% + \countUTFz = "#1\relax + \begingroup + \parseXMLCharref + + % Give \u8:... its definition. The sequence of seven \expandafter's + % expands after the \gdef three times, e.g. + % + % 1. \UTFviiTwoOctetsName B1 B2 + % 2. \csname u8:B1 \string B2 \endcsname + % 3. \u8: B1 B2 (a single control sequence token) + % + \expandafter\expandafter + \expandafter\expandafter + \expandafter\expandafter + \expandafter\gdef \UTFviiiTmp{#2}% + % + \expandafter\ifx\csname uni:#1\endcsname \relax \else + \message{Internal error, already defined: #1}% + \fi + % + % define an additional control sequence for this code point. + \expandafter\globallet\csname uni:#1\endcsname \UTFviiiTmp + \endgroup} + % + % Given the value in \countUTFz as a Unicode code point, set \UTFviiiTmp + % to the corresponding UTF-8 sequence. + \gdef\parseXMLCharref{% + \ifnum\countUTFz < "A0\relax + \errhelp = \EMsimple + \errmessage{Cannot define Unicode char value < 00A0}% + \else\ifnum\countUTFz < "800\relax + \parseUTFviiiA,% + \parseUTFviiiB C\UTFviiiTwoOctetsName.,% + \else\ifnum\countUTFz < "10000\relax + \parseUTFviiiA;% + \parseUTFviiiA,% + \parseUTFviiiB E\UTFviiiThreeOctetsName.{,;}% + \else + \parseUTFviiiA;% + \parseUTFviiiA,% + \parseUTFviiiA!% + \parseUTFviiiB F\UTFviiiFourOctetsName.{!,;}% + \fi\fi\fi + } + + % Extract a byte from the end of the UTF-8 representation of \countUTFx. + % It must be a non-initial byte in the sequence. + % Change \uccode of #1 for it to be used in \parseUTFviiiB as one + % of the bytes. + \gdef\parseUTFviiiA#1{% + \countUTFx = \countUTFz + \divide\countUTFz by 64 + \countUTFy = \countUTFz % Save to be the future value of \countUTFz. + \multiply\countUTFz by 64 + + % \countUTFz is now \countUTFx with the last 5 bits cleared. Subtract + % in order to get the last five bits. + \advance\countUTFx by -\countUTFz + + % Convert this to the byte in the UTF-8 sequence. + \advance\countUTFx by 128 + \uccode `#1\countUTFx + \countUTFz = \countUTFy} + + % Used to put a UTF-8 byte sequence into \UTFviiiTmp + % #1 is the increment for \countUTFz to yield a the first byte of the UTF-8 + % sequence. + % #2 is one of the \UTFviii*OctetsName macros. + % #3 is always a full stop (.) + % #4 is a template for the other bytes in the sequence. The values for these + % bytes is substituted in here with \uppercase using the \uccode's. + \gdef\parseUTFviiiB#1#2#3#4{% + \advance\countUTFz by "#10\relax + \uccode `#3\countUTFz + \uppercase{\gdef\UTFviiiTmp{#2#3#4}}} +\endgroup + +% For native Unicode handling (XeTeX and LuaTeX), +% provide a definition macro that sets a catcode to `other' non-globally +% +\def\DeclareUnicodeCharacterNativeOther#1#2{% + \catcode"#1=\other +} + +% https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_M +% U+0000..U+007F = https://en.wikipedia.org/wiki/Basic_Latin_(Unicode_block) +% U+0080..U+00FF = https://en.wikipedia.org/wiki/Latin-1_Supplement_(Unicode_block) +% U+0100..U+017F = https://en.wikipedia.org/wiki/Latin_Extended-A +% U+0180..U+024F = https://en.wikipedia.org/wiki/Latin_Extended-B +% +% Many of our renditions are less than wonderful, and all the missing +% characters are available somewhere. Loading the necessary fonts +% awaits user request. We can't truly support Unicode without +% reimplementing everything that's been done in LaTeX for many years, +% plus probably using luatex or xetex, and who knows what else. +% We won't be doing that here in this simple file. But we can try to at +% least make most of the characters not bomb out. +% +\def\unicodechardefs{% + \DeclareUnicodeCharacter{00A0}{\tie}% + \DeclareUnicodeCharacter{00A1}{\exclamdown}% + \DeclareUnicodeCharacter{00A2}{{\tcfont \char162}}% 0242=cent + \DeclareUnicodeCharacter{00A3}{\pounds{}}% + \DeclareUnicodeCharacter{00A4}{{\tcfont \char164}}% 0244=currency + \DeclareUnicodeCharacter{00A5}{{\tcfont \char165}}% 0245=yen + \DeclareUnicodeCharacter{00A6}{{\tcfont \char166}}% 0246=brokenbar + \DeclareUnicodeCharacter{00A7}{\S}% + \DeclareUnicodeCharacter{00A8}{\"{ }}% + \DeclareUnicodeCharacter{00A9}{\copyright{}}% + \DeclareUnicodeCharacter{00AA}{\ordf}% + \DeclareUnicodeCharacter{00AB}{\guillemetleft{}}% + \DeclareUnicodeCharacter{00AC}{\ensuremath\lnot}% + \DeclareUnicodeCharacter{00AD}{\-}% + \DeclareUnicodeCharacter{00AE}{\registeredsymbol{}}% + \DeclareUnicodeCharacter{00AF}{\={ }}% + % + \DeclareUnicodeCharacter{00B0}{\ringaccent{ }}% + \DeclareUnicodeCharacter{00B1}{\ensuremath\pm}% + \DeclareUnicodeCharacter{00B2}{$^2$}% + \DeclareUnicodeCharacter{00B3}{$^3$}% + \DeclareUnicodeCharacter{00B4}{\'{ }}% + \DeclareUnicodeCharacter{00B5}{$\mu$}% + \DeclareUnicodeCharacter{00B6}{\P}% + \DeclareUnicodeCharacter{00B7}{\ensuremath\cdot}% + \DeclareUnicodeCharacter{00B8}{\cedilla{ }}% + \DeclareUnicodeCharacter{00B9}{$^1$}% + \DeclareUnicodeCharacter{00BA}{\ordm}% + \DeclareUnicodeCharacter{00BB}{\guillemetright{}}% + \DeclareUnicodeCharacter{00BC}{$1\over4$}% + \DeclareUnicodeCharacter{00BD}{$1\over2$}% + \DeclareUnicodeCharacter{00BE}{$3\over4$}% + \DeclareUnicodeCharacter{00BF}{\questiondown}% + % + \DeclareUnicodeCharacter{00C0}{\`A}% + \DeclareUnicodeCharacter{00C1}{\'A}% + \DeclareUnicodeCharacter{00C2}{\^A}% + \DeclareUnicodeCharacter{00C3}{\~A}% + \DeclareUnicodeCharacter{00C4}{\"A}% + \DeclareUnicodeCharacter{00C5}{\AA}% + \DeclareUnicodeCharacter{00C6}{\AE}% + \DeclareUnicodeCharacter{00C7}{\cedilla{C}}% + \DeclareUnicodeCharacter{00C8}{\`E}% + \DeclareUnicodeCharacter{00C9}{\'E}% + \DeclareUnicodeCharacter{00CA}{\^E}% + \DeclareUnicodeCharacter{00CB}{\"E}% + \DeclareUnicodeCharacter{00CC}{\`I}% + \DeclareUnicodeCharacter{00CD}{\'I}% + \DeclareUnicodeCharacter{00CE}{\^I}% + \DeclareUnicodeCharacter{00CF}{\"I}% + % + \DeclareUnicodeCharacter{00D0}{\DH}% + \DeclareUnicodeCharacter{00D1}{\~N}% + \DeclareUnicodeCharacter{00D2}{\`O}% + \DeclareUnicodeCharacter{00D3}{\'O}% + \DeclareUnicodeCharacter{00D4}{\^O}% + \DeclareUnicodeCharacter{00D5}{\~O}% + \DeclareUnicodeCharacter{00D6}{\"O}% + \DeclareUnicodeCharacter{00D7}{\ensuremath\times}% + \DeclareUnicodeCharacter{00D8}{\O}% + \DeclareUnicodeCharacter{00D9}{\`U}% + \DeclareUnicodeCharacter{00DA}{\'U}% + \DeclareUnicodeCharacter{00DB}{\^U}% + \DeclareUnicodeCharacter{00DC}{\"U}% + \DeclareUnicodeCharacter{00DD}{\'Y}% + \DeclareUnicodeCharacter{00DE}{\TH}% + \DeclareUnicodeCharacter{00DF}{\ss}% + % + \DeclareUnicodeCharacter{00E0}{\`a}% + \DeclareUnicodeCharacter{00E1}{\'a}% + \DeclareUnicodeCharacter{00E2}{\^a}% + \DeclareUnicodeCharacter{00E3}{\~a}% + \DeclareUnicodeCharacter{00E4}{\"a}% + \DeclareUnicodeCharacter{00E5}{\aa}% + \DeclareUnicodeCharacter{00E6}{\ae}% + \DeclareUnicodeCharacter{00E7}{\cedilla{c}}% + \DeclareUnicodeCharacter{00E8}{\`e}% + \DeclareUnicodeCharacter{00E9}{\'e}% + \DeclareUnicodeCharacter{00EA}{\^e}% + \DeclareUnicodeCharacter{00EB}{\"e}% + \DeclareUnicodeCharacter{00EC}{\`{\dotless{i}}}% + \DeclareUnicodeCharacter{00ED}{\'{\dotless{i}}}% + \DeclareUnicodeCharacter{00EE}{\^{\dotless{i}}}% + \DeclareUnicodeCharacter{00EF}{\"{\dotless{i}}}% + % + \DeclareUnicodeCharacter{00F0}{\dh}% + \DeclareUnicodeCharacter{00F1}{\~n}% + \DeclareUnicodeCharacter{00F2}{\`o}% + \DeclareUnicodeCharacter{00F3}{\'o}% + \DeclareUnicodeCharacter{00F4}{\^o}% + \DeclareUnicodeCharacter{00F5}{\~o}% + \DeclareUnicodeCharacter{00F6}{\"o}% + \DeclareUnicodeCharacter{00F7}{\ensuremath\div}% + \DeclareUnicodeCharacter{00F8}{\o}% + \DeclareUnicodeCharacter{00F9}{\`u}% + \DeclareUnicodeCharacter{00FA}{\'u}% + \DeclareUnicodeCharacter{00FB}{\^u}% + \DeclareUnicodeCharacter{00FC}{\"u}% + \DeclareUnicodeCharacter{00FD}{\'y}% + \DeclareUnicodeCharacter{00FE}{\th}% + \DeclareUnicodeCharacter{00FF}{\"y}% + % + \DeclareUnicodeCharacter{0100}{\=A}% + \DeclareUnicodeCharacter{0101}{\=a}% + \DeclareUnicodeCharacter{0102}{\u{A}}% + \DeclareUnicodeCharacter{0103}{\u{a}}% + \DeclareUnicodeCharacter{0104}{\ogonek{A}}% + \DeclareUnicodeCharacter{0105}{\ogonek{a}}% + \DeclareUnicodeCharacter{0106}{\'C}% + \DeclareUnicodeCharacter{0107}{\'c}% + \DeclareUnicodeCharacter{0108}{\^C}% + \DeclareUnicodeCharacter{0109}{\^c}% + \DeclareUnicodeCharacter{010A}{\dotaccent{C}}% + \DeclareUnicodeCharacter{010B}{\dotaccent{c}}% + \DeclareUnicodeCharacter{010C}{\v{C}}% + \DeclareUnicodeCharacter{010D}{\v{c}}% + \DeclareUnicodeCharacter{010E}{\v{D}}% + \DeclareUnicodeCharacter{010F}{d'}% + % + \DeclareUnicodeCharacter{0110}{\DH}% + \DeclareUnicodeCharacter{0111}{\dh}% + \DeclareUnicodeCharacter{0112}{\=E}% + \DeclareUnicodeCharacter{0113}{\=e}% + \DeclareUnicodeCharacter{0114}{\u{E}}% + \DeclareUnicodeCharacter{0115}{\u{e}}% + \DeclareUnicodeCharacter{0116}{\dotaccent{E}}% + \DeclareUnicodeCharacter{0117}{\dotaccent{e}}% + \DeclareUnicodeCharacter{0118}{\ogonek{E}}% + \DeclareUnicodeCharacter{0119}{\ogonek{e}}% + \DeclareUnicodeCharacter{011A}{\v{E}}% + \DeclareUnicodeCharacter{011B}{\v{e}}% + \DeclareUnicodeCharacter{011C}{\^G}% + \DeclareUnicodeCharacter{011D}{\^g}% + \DeclareUnicodeCharacter{011E}{\u{G}}% + \DeclareUnicodeCharacter{011F}{\u{g}}% + % + \DeclareUnicodeCharacter{0120}{\dotaccent{G}}% + \DeclareUnicodeCharacter{0121}{\dotaccent{g}}% + \DeclareUnicodeCharacter{0122}{\cedilla{G}}% + \DeclareUnicodeCharacter{0123}{\cedilla{g}}% + \DeclareUnicodeCharacter{0124}{\^H}% + \DeclareUnicodeCharacter{0125}{\^h}% + \DeclareUnicodeCharacter{0126}{\missingcharmsg{H WITH STROKE}}% + \DeclareUnicodeCharacter{0127}{\missingcharmsg{h WITH STROKE}}% + \DeclareUnicodeCharacter{0128}{\~I}% + \DeclareUnicodeCharacter{0129}{\~{\dotless{i}}}% + \DeclareUnicodeCharacter{012A}{\=I}% + \DeclareUnicodeCharacter{012B}{\={\dotless{i}}}% + \DeclareUnicodeCharacter{012C}{\u{I}}% + \DeclareUnicodeCharacter{012D}{\u{\dotless{i}}}% + \DeclareUnicodeCharacter{012E}{\ogonek{I}}% + \DeclareUnicodeCharacter{012F}{\ogonek{i}}% + % + \DeclareUnicodeCharacter{0130}{\dotaccent{I}}% + \DeclareUnicodeCharacter{0131}{\dotless{i}}% + \DeclareUnicodeCharacter{0132}{IJ}% + \DeclareUnicodeCharacter{0133}{ij}% + \DeclareUnicodeCharacter{0134}{\^J}% + \DeclareUnicodeCharacter{0135}{\^{\dotless{j}}}% + \DeclareUnicodeCharacter{0136}{\cedilla{K}}% + \DeclareUnicodeCharacter{0137}{\cedilla{k}}% + \DeclareUnicodeCharacter{0138}{\ensuremath\kappa}% + \DeclareUnicodeCharacter{0139}{\'L}% + \DeclareUnicodeCharacter{013A}{\'l}% + \DeclareUnicodeCharacter{013B}{\cedilla{L}}% + \DeclareUnicodeCharacter{013C}{\cedilla{l}}% + \DeclareUnicodeCharacter{013D}{L'}% should kern + \DeclareUnicodeCharacter{013E}{l'}% should kern + \DeclareUnicodeCharacter{013F}{L\U{00B7}}% + % + \DeclareUnicodeCharacter{0140}{l\U{00B7}}% + \DeclareUnicodeCharacter{0141}{\L}% + \DeclareUnicodeCharacter{0142}{\l}% + \DeclareUnicodeCharacter{0143}{\'N}% + \DeclareUnicodeCharacter{0144}{\'n}% + \DeclareUnicodeCharacter{0145}{\cedilla{N}}% + \DeclareUnicodeCharacter{0146}{\cedilla{n}}% + \DeclareUnicodeCharacter{0147}{\v{N}}% + \DeclareUnicodeCharacter{0148}{\v{n}}% + \DeclareUnicodeCharacter{0149}{'n}% + \DeclareUnicodeCharacter{014A}{\missingcharmsg{ENG}}% + \DeclareUnicodeCharacter{014B}{\missingcharmsg{eng}}% + \DeclareUnicodeCharacter{014C}{\=O}% + \DeclareUnicodeCharacter{014D}{\=o}% + \DeclareUnicodeCharacter{014E}{\u{O}}% + \DeclareUnicodeCharacter{014F}{\u{o}}% + % + \DeclareUnicodeCharacter{0150}{\H{O}}% + \DeclareUnicodeCharacter{0151}{\H{o}}% + \DeclareUnicodeCharacter{0152}{\OE}% + \DeclareUnicodeCharacter{0153}{\oe}% + \DeclareUnicodeCharacter{0154}{\'R}% + \DeclareUnicodeCharacter{0155}{\'r}% + \DeclareUnicodeCharacter{0156}{\cedilla{R}}% + \DeclareUnicodeCharacter{0157}{\cedilla{r}}% + \DeclareUnicodeCharacter{0158}{\v{R}}% + \DeclareUnicodeCharacter{0159}{\v{r}}% + \DeclareUnicodeCharacter{015A}{\'S}% + \DeclareUnicodeCharacter{015B}{\'s}% + \DeclareUnicodeCharacter{015C}{\^S}% + \DeclareUnicodeCharacter{015D}{\^s}% + \DeclareUnicodeCharacter{015E}{\cedilla{S}}% + \DeclareUnicodeCharacter{015F}{\cedilla{s}}% + % + \DeclareUnicodeCharacter{0160}{\v{S}}% + \DeclareUnicodeCharacter{0161}{\v{s}}% + \DeclareUnicodeCharacter{0162}{\cedilla{T}}% + \DeclareUnicodeCharacter{0163}{\cedilla{t}}% + \DeclareUnicodeCharacter{0164}{\v{T}}% + \DeclareUnicodeCharacter{0165}{\v{t}}% + \DeclareUnicodeCharacter{0166}{\missingcharmsg{H WITH STROKE}}% + \DeclareUnicodeCharacter{0167}{\missingcharmsg{h WITH STROKE}}% + \DeclareUnicodeCharacter{0168}{\~U}% + \DeclareUnicodeCharacter{0169}{\~u}% + \DeclareUnicodeCharacter{016A}{\=U}% + \DeclareUnicodeCharacter{016B}{\=u}% + \DeclareUnicodeCharacter{016C}{\u{U}}% + \DeclareUnicodeCharacter{016D}{\u{u}}% + \DeclareUnicodeCharacter{016E}{\ringaccent{U}}% + \DeclareUnicodeCharacter{016F}{\ringaccent{u}}% + % + \DeclareUnicodeCharacter{0170}{\H{U}}% + \DeclareUnicodeCharacter{0171}{\H{u}}% + \DeclareUnicodeCharacter{0172}{\ogonek{U}}% + \DeclareUnicodeCharacter{0173}{\ogonek{u}}% + \DeclareUnicodeCharacter{0174}{\^W}% + \DeclareUnicodeCharacter{0175}{\^w}% + \DeclareUnicodeCharacter{0176}{\^Y}% + \DeclareUnicodeCharacter{0177}{\^y}% + \DeclareUnicodeCharacter{0178}{\"Y}% + \DeclareUnicodeCharacter{0179}{\'Z}% + \DeclareUnicodeCharacter{017A}{\'z}% + \DeclareUnicodeCharacter{017B}{\dotaccent{Z}}% + \DeclareUnicodeCharacter{017C}{\dotaccent{z}}% + \DeclareUnicodeCharacter{017D}{\v{Z}}% + \DeclareUnicodeCharacter{017E}{\v{z}}% + \DeclareUnicodeCharacter{017F}{\missingcharmsg{LONG S}}% + % + \DeclareUnicodeCharacter{01C4}{D\v{Z}}% + \DeclareUnicodeCharacter{01C5}{D\v{z}}% + \DeclareUnicodeCharacter{01C6}{d\v{z}}% + \DeclareUnicodeCharacter{01C7}{LJ}% + \DeclareUnicodeCharacter{01C8}{Lj}% + \DeclareUnicodeCharacter{01C9}{lj}% + \DeclareUnicodeCharacter{01CA}{NJ}% + \DeclareUnicodeCharacter{01CB}{Nj}% + \DeclareUnicodeCharacter{01CC}{nj}% + \DeclareUnicodeCharacter{01CD}{\v{A}}% + \DeclareUnicodeCharacter{01CE}{\v{a}}% + \DeclareUnicodeCharacter{01CF}{\v{I}}% + % + \DeclareUnicodeCharacter{01D0}{\v{\dotless{i}}}% + \DeclareUnicodeCharacter{01D1}{\v{O}}% + \DeclareUnicodeCharacter{01D2}{\v{o}}% + \DeclareUnicodeCharacter{01D3}{\v{U}}% + \DeclareUnicodeCharacter{01D4}{\v{u}}% + % + \DeclareUnicodeCharacter{01E2}{\={\AE}}% + \DeclareUnicodeCharacter{01E3}{\={\ae}}% + \DeclareUnicodeCharacter{01E6}{\v{G}}% + \DeclareUnicodeCharacter{01E7}{\v{g}}% + \DeclareUnicodeCharacter{01E8}{\v{K}}% + \DeclareUnicodeCharacter{01E9}{\v{k}}% + % + \DeclareUnicodeCharacter{01F0}{\v{\dotless{j}}}% + \DeclareUnicodeCharacter{01F1}{DZ}% + \DeclareUnicodeCharacter{01F2}{Dz}% + \DeclareUnicodeCharacter{01F3}{dz}% + \DeclareUnicodeCharacter{01F4}{\'G}% + \DeclareUnicodeCharacter{01F5}{\'g}% + \DeclareUnicodeCharacter{01F8}{\`N}% + \DeclareUnicodeCharacter{01F9}{\`n}% + \DeclareUnicodeCharacter{01FC}{\'{\AE}}% + \DeclareUnicodeCharacter{01FD}{\'{\ae}}% + \DeclareUnicodeCharacter{01FE}{\'{\O}}% + \DeclareUnicodeCharacter{01FF}{\'{\o}}% + % + \DeclareUnicodeCharacter{021E}{\v{H}}% + \DeclareUnicodeCharacter{021F}{\v{h}}% + % + \DeclareUnicodeCharacter{0226}{\dotaccent{A}}% + \DeclareUnicodeCharacter{0227}{\dotaccent{a}}% + \DeclareUnicodeCharacter{0228}{\cedilla{E}}% + \DeclareUnicodeCharacter{0229}{\cedilla{e}}% + \DeclareUnicodeCharacter{022E}{\dotaccent{O}}% + \DeclareUnicodeCharacter{022F}{\dotaccent{o}}% + % + \DeclareUnicodeCharacter{0232}{\=Y}% + \DeclareUnicodeCharacter{0233}{\=y}% + \DeclareUnicodeCharacter{0237}{\dotless{j}}% + % + \DeclareUnicodeCharacter{02DB}{\ogonek{ }}% + % + % Greek letters upper case + \DeclareUnicodeCharacter{0391}{{\it A}}% + \DeclareUnicodeCharacter{0392}{{\it B}}% + \DeclareUnicodeCharacter{0393}{\ensuremath{\mit\Gamma}}% + \DeclareUnicodeCharacter{0394}{\ensuremath{\mit\Delta}}% + \DeclareUnicodeCharacter{0395}{{\it E}}% + \DeclareUnicodeCharacter{0396}{{\it Z}}% + \DeclareUnicodeCharacter{0397}{{\it H}}% + \DeclareUnicodeCharacter{0398}{\ensuremath{\mit\Theta}}% + \DeclareUnicodeCharacter{0399}{{\it I}}% + \DeclareUnicodeCharacter{039A}{{\it K}}% + \DeclareUnicodeCharacter{039B}{\ensuremath{\mit\Lambda}}% + \DeclareUnicodeCharacter{039C}{{\it M}}% + \DeclareUnicodeCharacter{039D}{{\it N}}% + \DeclareUnicodeCharacter{039E}{\ensuremath{\mit\Xi}}% + \DeclareUnicodeCharacter{039F}{{\it O}}% + \DeclareUnicodeCharacter{03A0}{\ensuremath{\mit\Pi}}% + \DeclareUnicodeCharacter{03A1}{{\it P}}% + %\DeclareUnicodeCharacter{03A2}{} % none - corresponds to final sigma + \DeclareUnicodeCharacter{03A3}{\ensuremath{\mit\Sigma}}% + \DeclareUnicodeCharacter{03A4}{{\it T}}% + \DeclareUnicodeCharacter{03A5}{\ensuremath{\mit\Upsilon}}% + \DeclareUnicodeCharacter{03A6}{\ensuremath{\mit\Phi}}% + \DeclareUnicodeCharacter{03A7}{{\it X}}% + \DeclareUnicodeCharacter{03A8}{\ensuremath{\mit\Psi}}% + \DeclareUnicodeCharacter{03A9}{\ensuremath{\mit\Omega}}% + % + % Vowels with accents + \DeclareUnicodeCharacter{0390}{\ensuremath{\ddot{\acute\iota}}}% + \DeclareUnicodeCharacter{03AC}{\ensuremath{\acute\alpha}}% + \DeclareUnicodeCharacter{03AD}{\ensuremath{\acute\epsilon}}% + \DeclareUnicodeCharacter{03AE}{\ensuremath{\acute\eta}}% + \DeclareUnicodeCharacter{03AF}{\ensuremath{\acute\iota}}% + \DeclareUnicodeCharacter{03B0}{\ensuremath{\acute{\ddot\upsilon}}}% + % + % Standalone accent + \DeclareUnicodeCharacter{0384}{\ensuremath{\acute{\ }}}% + % + % Greek letters lower case + \DeclareUnicodeCharacter{03B1}{\ensuremath\alpha}% + \DeclareUnicodeCharacter{03B2}{\ensuremath\beta}% + \DeclareUnicodeCharacter{03B3}{\ensuremath\gamma}% + \DeclareUnicodeCharacter{03B4}{\ensuremath\delta}% + \DeclareUnicodeCharacter{03B5}{\ensuremath\epsilon}% + \DeclareUnicodeCharacter{03B6}{\ensuremath\zeta}% + \DeclareUnicodeCharacter{03B7}{\ensuremath\eta}% + \DeclareUnicodeCharacter{03B8}{\ensuremath\theta}% + \DeclareUnicodeCharacter{03B9}{\ensuremath\iota}% + \DeclareUnicodeCharacter{03BA}{\ensuremath\kappa}% + \DeclareUnicodeCharacter{03BB}{\ensuremath\lambda}% + \DeclareUnicodeCharacter{03BC}{\ensuremath\mu}% + \DeclareUnicodeCharacter{03BD}{\ensuremath\nu}% + \DeclareUnicodeCharacter{03BE}{\ensuremath\xi}% + \DeclareUnicodeCharacter{03BF}{{\it o}}% omicron + \DeclareUnicodeCharacter{03C0}{\ensuremath\pi}% + \DeclareUnicodeCharacter{03C1}{\ensuremath\rho}% + \DeclareUnicodeCharacter{03C2}{\ensuremath\varsigma}% + \DeclareUnicodeCharacter{03C3}{\ensuremath\sigma}% + \DeclareUnicodeCharacter{03C4}{\ensuremath\tau}% + \DeclareUnicodeCharacter{03C5}{\ensuremath\upsilon}% + \DeclareUnicodeCharacter{03C6}{\ensuremath\phi}% + \DeclareUnicodeCharacter{03C7}{\ensuremath\chi}% + \DeclareUnicodeCharacter{03C8}{\ensuremath\psi}% + \DeclareUnicodeCharacter{03C9}{\ensuremath\omega}% + % + % More Greek vowels with accents + \DeclareUnicodeCharacter{03CA}{\ensuremath{\ddot\iota}}% + \DeclareUnicodeCharacter{03CB}{\ensuremath{\ddot\upsilon}}% + \DeclareUnicodeCharacter{03CC}{\ensuremath{\acute o}}% + \DeclareUnicodeCharacter{03CD}{\ensuremath{\acute\upsilon}}% + \DeclareUnicodeCharacter{03CE}{\ensuremath{\acute\omega}}% + % + % Variant Greek letters + \DeclareUnicodeCharacter{03D1}{\ensuremath\vartheta}% + \DeclareUnicodeCharacter{03D6}{\ensuremath\varpi}% + \DeclareUnicodeCharacter{03F1}{\ensuremath\varrho}% + % + \DeclareUnicodeCharacter{1E02}{\dotaccent{B}}% + \DeclareUnicodeCharacter{1E03}{\dotaccent{b}}% + \DeclareUnicodeCharacter{1E04}{\udotaccent{B}}% + \DeclareUnicodeCharacter{1E05}{\udotaccent{b}}% + \DeclareUnicodeCharacter{1E06}{\ubaraccent{B}}% + \DeclareUnicodeCharacter{1E07}{\ubaraccent{b}}% + \DeclareUnicodeCharacter{1E0A}{\dotaccent{D}}% + \DeclareUnicodeCharacter{1E0B}{\dotaccent{d}}% + \DeclareUnicodeCharacter{1E0C}{\udotaccent{D}}% + \DeclareUnicodeCharacter{1E0D}{\udotaccent{d}}% + \DeclareUnicodeCharacter{1E0E}{\ubaraccent{D}}% + \DeclareUnicodeCharacter{1E0F}{\ubaraccent{d}}% + % + \DeclareUnicodeCharacter{1E1E}{\dotaccent{F}}% + \DeclareUnicodeCharacter{1E1F}{\dotaccent{f}}% + % + \DeclareUnicodeCharacter{1E20}{\=G}% + \DeclareUnicodeCharacter{1E21}{\=g}% + \DeclareUnicodeCharacter{1E22}{\dotaccent{H}}% + \DeclareUnicodeCharacter{1E23}{\dotaccent{h}}% + \DeclareUnicodeCharacter{1E24}{\udotaccent{H}}% + \DeclareUnicodeCharacter{1E25}{\udotaccent{h}}% + \DeclareUnicodeCharacter{1E26}{\"H}% + \DeclareUnicodeCharacter{1E27}{\"h}% + % + \DeclareUnicodeCharacter{1E30}{\'K}% + \DeclareUnicodeCharacter{1E31}{\'k}% + \DeclareUnicodeCharacter{1E32}{\udotaccent{K}}% + \DeclareUnicodeCharacter{1E33}{\udotaccent{k}}% + \DeclareUnicodeCharacter{1E34}{\ubaraccent{K}}% + \DeclareUnicodeCharacter{1E35}{\ubaraccent{k}}% + \DeclareUnicodeCharacter{1E36}{\udotaccent{L}}% + \DeclareUnicodeCharacter{1E37}{\udotaccent{l}}% + \DeclareUnicodeCharacter{1E3A}{\ubaraccent{L}}% + \DeclareUnicodeCharacter{1E3B}{\ubaraccent{l}}% + \DeclareUnicodeCharacter{1E3E}{\'M}% + \DeclareUnicodeCharacter{1E3F}{\'m}% + % + \DeclareUnicodeCharacter{1E40}{\dotaccent{M}}% + \DeclareUnicodeCharacter{1E41}{\dotaccent{m}}% + \DeclareUnicodeCharacter{1E42}{\udotaccent{M}}% + \DeclareUnicodeCharacter{1E43}{\udotaccent{m}}% + \DeclareUnicodeCharacter{1E44}{\dotaccent{N}}% + \DeclareUnicodeCharacter{1E45}{\dotaccent{n}}% + \DeclareUnicodeCharacter{1E46}{\udotaccent{N}}% + \DeclareUnicodeCharacter{1E47}{\udotaccent{n}}% + \DeclareUnicodeCharacter{1E48}{\ubaraccent{N}}% + \DeclareUnicodeCharacter{1E49}{\ubaraccent{n}}% + % + \DeclareUnicodeCharacter{1E54}{\'P}% + \DeclareUnicodeCharacter{1E55}{\'p}% + \DeclareUnicodeCharacter{1E56}{\dotaccent{P}}% + \DeclareUnicodeCharacter{1E57}{\dotaccent{p}}% + \DeclareUnicodeCharacter{1E58}{\dotaccent{R}}% + \DeclareUnicodeCharacter{1E59}{\dotaccent{r}}% + \DeclareUnicodeCharacter{1E5A}{\udotaccent{R}}% + \DeclareUnicodeCharacter{1E5B}{\udotaccent{r}}% + \DeclareUnicodeCharacter{1E5E}{\ubaraccent{R}}% + \DeclareUnicodeCharacter{1E5F}{\ubaraccent{r}}% + % + \DeclareUnicodeCharacter{1E60}{\dotaccent{S}}% + \DeclareUnicodeCharacter{1E61}{\dotaccent{s}}% + \DeclareUnicodeCharacter{1E62}{\udotaccent{S}}% + \DeclareUnicodeCharacter{1E63}{\udotaccent{s}}% + \DeclareUnicodeCharacter{1E6A}{\dotaccent{T}}% + \DeclareUnicodeCharacter{1E6B}{\dotaccent{t}}% + \DeclareUnicodeCharacter{1E6C}{\udotaccent{T}}% + \DeclareUnicodeCharacter{1E6D}{\udotaccent{t}}% + \DeclareUnicodeCharacter{1E6E}{\ubaraccent{T}}% + \DeclareUnicodeCharacter{1E6F}{\ubaraccent{t}}% + % + \DeclareUnicodeCharacter{1E7C}{\~V}% + \DeclareUnicodeCharacter{1E7D}{\~v}% + \DeclareUnicodeCharacter{1E7E}{\udotaccent{V}}% + \DeclareUnicodeCharacter{1E7F}{\udotaccent{v}}% + % + \DeclareUnicodeCharacter{1E80}{\`W}% + \DeclareUnicodeCharacter{1E81}{\`w}% + \DeclareUnicodeCharacter{1E82}{\'W}% + \DeclareUnicodeCharacter{1E83}{\'w}% + \DeclareUnicodeCharacter{1E84}{\"W}% + \DeclareUnicodeCharacter{1E85}{\"w}% + \DeclareUnicodeCharacter{1E86}{\dotaccent{W}}% + \DeclareUnicodeCharacter{1E87}{\dotaccent{w}}% + \DeclareUnicodeCharacter{1E88}{\udotaccent{W}}% + \DeclareUnicodeCharacter{1E89}{\udotaccent{w}}% + \DeclareUnicodeCharacter{1E8A}{\dotaccent{X}}% + \DeclareUnicodeCharacter{1E8B}{\dotaccent{x}}% + \DeclareUnicodeCharacter{1E8C}{\"X}% + \DeclareUnicodeCharacter{1E8D}{\"x}% + \DeclareUnicodeCharacter{1E8E}{\dotaccent{Y}}% + \DeclareUnicodeCharacter{1E8F}{\dotaccent{y}}% + % + \DeclareUnicodeCharacter{1E90}{\^Z}% + \DeclareUnicodeCharacter{1E91}{\^z}% + \DeclareUnicodeCharacter{1E92}{\udotaccent{Z}}% + \DeclareUnicodeCharacter{1E93}{\udotaccent{z}}% + \DeclareUnicodeCharacter{1E94}{\ubaraccent{Z}}% + \DeclareUnicodeCharacter{1E95}{\ubaraccent{z}}% + \DeclareUnicodeCharacter{1E96}{\ubaraccent{h}}% + \DeclareUnicodeCharacter{1E97}{\"t}% + \DeclareUnicodeCharacter{1E98}{\ringaccent{w}}% + \DeclareUnicodeCharacter{1E99}{\ringaccent{y}}% + % + \DeclareUnicodeCharacter{1EA0}{\udotaccent{A}}% + \DeclareUnicodeCharacter{1EA1}{\udotaccent{a}}% + % + \DeclareUnicodeCharacter{1EB8}{\udotaccent{E}}% + \DeclareUnicodeCharacter{1EB9}{\udotaccent{e}}% + \DeclareUnicodeCharacter{1EBC}{\~E}% + \DeclareUnicodeCharacter{1EBD}{\~e}% + % + \DeclareUnicodeCharacter{1ECA}{\udotaccent{I}}% + \DeclareUnicodeCharacter{1ECB}{\udotaccent{i}}% + \DeclareUnicodeCharacter{1ECC}{\udotaccent{O}}% + \DeclareUnicodeCharacter{1ECD}{\udotaccent{o}}% + % + \DeclareUnicodeCharacter{1EE4}{\udotaccent{U}}% + \DeclareUnicodeCharacter{1EE5}{\udotaccent{u}}% + % + \DeclareUnicodeCharacter{1EF2}{\`Y}% + \DeclareUnicodeCharacter{1EF3}{\`y}% + \DeclareUnicodeCharacter{1EF4}{\udotaccent{Y}}% + % + \DeclareUnicodeCharacter{1EF8}{\~Y}% + \DeclareUnicodeCharacter{1EF9}{\~y}% + % + % Punctuation + \DeclareUnicodeCharacter{2013}{--}% + \DeclareUnicodeCharacter{2014}{---}% + \DeclareUnicodeCharacter{2018}{\quoteleft{}}% + \DeclareUnicodeCharacter{2019}{\quoteright{}}% + \DeclareUnicodeCharacter{201A}{\quotesinglbase{}}% + \DeclareUnicodeCharacter{201C}{\quotedblleft{}}% + \DeclareUnicodeCharacter{201D}{\quotedblright{}}% + \DeclareUnicodeCharacter{201E}{\quotedblbase{}}% + \DeclareUnicodeCharacter{2020}{\ensuremath\dagger}% + \DeclareUnicodeCharacter{2021}{\ensuremath\ddagger}% + \DeclareUnicodeCharacter{2022}{\bullet{}}% + \DeclareUnicodeCharacter{202F}{\thinspace}% + \DeclareUnicodeCharacter{2026}{\dots{}}% + \DeclareUnicodeCharacter{2039}{\guilsinglleft{}}% + \DeclareUnicodeCharacter{203A}{\guilsinglright{}}% + % + \DeclareUnicodeCharacter{20AC}{\euro{}}% + % + \DeclareUnicodeCharacter{2192}{\expansion{}}% + \DeclareUnicodeCharacter{21D2}{\result{}}% + % + % Mathematical symbols + \DeclareUnicodeCharacter{2200}{\ensuremath\forall}% + \DeclareUnicodeCharacter{2203}{\ensuremath\exists}% + \DeclareUnicodeCharacter{2208}{\ensuremath\in}% + \DeclareUnicodeCharacter{2212}{\minus{}}% + \DeclareUnicodeCharacter{2217}{\ast}% + \DeclareUnicodeCharacter{221E}{\ensuremath\infty}% + \DeclareUnicodeCharacter{2225}{\ensuremath\parallel}% + \DeclareUnicodeCharacter{2227}{\ensuremath\wedge}% + \DeclareUnicodeCharacter{2229}{\ensuremath\cap}% + \DeclareUnicodeCharacter{2261}{\equiv{}}% + \DeclareUnicodeCharacter{2264}{\ensuremath\leq}% + \DeclareUnicodeCharacter{2265}{\ensuremath\geq}% + \DeclareUnicodeCharacter{2282}{\ensuremath\subset}% + \DeclareUnicodeCharacter{2287}{\ensuremath\supseteq}% + % + \DeclareUnicodeCharacter{2016}{\ensuremath\Vert}% + \DeclareUnicodeCharacter{2032}{\ensuremath\prime}% + \DeclareUnicodeCharacter{210F}{\ensuremath\hbar}% + \DeclareUnicodeCharacter{2111}{\ensuremath\Im}% + \DeclareUnicodeCharacter{2113}{\ensuremath\ell}% + \DeclareUnicodeCharacter{2118}{\ensuremath\wp}% + \DeclareUnicodeCharacter{211C}{\ensuremath\Re}% + \DeclareUnicodeCharacter{2127}{\ensuremath\mho}% + \DeclareUnicodeCharacter{2135}{\ensuremath\aleph}% + \DeclareUnicodeCharacter{2190}{\ensuremath\leftarrow}% + \DeclareUnicodeCharacter{2191}{\ensuremath\uparrow}% + \DeclareUnicodeCharacter{2193}{\ensuremath\downarrow}% + \DeclareUnicodeCharacter{2194}{\ensuremath\leftrightarrow}% + \DeclareUnicodeCharacter{2195}{\ensuremath\updownarrow}% + \DeclareUnicodeCharacter{2196}{\ensuremath\nwarrow}% + \DeclareUnicodeCharacter{2197}{\ensuremath\nearrow}% + \DeclareUnicodeCharacter{2198}{\ensuremath\searrow}% + \DeclareUnicodeCharacter{2199}{\ensuremath\swarrow}% + \DeclareUnicodeCharacter{21A6}{\ensuremath\mapsto}% + \DeclareUnicodeCharacter{21A9}{\ensuremath\hookleftarrow}% + \DeclareUnicodeCharacter{21AA}{\ensuremath\hookrightarrow}% + \DeclareUnicodeCharacter{21BC}{\ensuremath\leftharpoonup}% + \DeclareUnicodeCharacter{21BD}{\ensuremath\leftharpoondown}% + \DeclareUnicodeCharacter{21BE}{\ensuremath\upharpoonright}% + \DeclareUnicodeCharacter{21C0}{\ensuremath\rightharpoonup}% + \DeclareUnicodeCharacter{21C1}{\ensuremath\rightharpoondown}% + \DeclareUnicodeCharacter{21CC}{\ensuremath\rightleftharpoons}% + \DeclareUnicodeCharacter{21D0}{\ensuremath\Leftarrow}% + \DeclareUnicodeCharacter{21D1}{\ensuremath\Uparrow}% + \DeclareUnicodeCharacter{21D3}{\ensuremath\Downarrow}% + \DeclareUnicodeCharacter{21D4}{\ensuremath\Leftrightarrow}% + \DeclareUnicodeCharacter{21D5}{\ensuremath\Updownarrow}% + \DeclareUnicodeCharacter{21DD}{\ensuremath\leadsto}% + \DeclareUnicodeCharacter{2201}{\ensuremath\complement}% + \DeclareUnicodeCharacter{2202}{\ensuremath\partial}% + \DeclareUnicodeCharacter{2205}{\ensuremath\emptyset}% + \DeclareUnicodeCharacter{2207}{\ensuremath\nabla}% + \DeclareUnicodeCharacter{2209}{\ensuremath\notin}% + \DeclareUnicodeCharacter{220B}{\ensuremath\owns}% + \DeclareUnicodeCharacter{220F}{\ensuremath\prod}% + \DeclareUnicodeCharacter{2210}{\ensuremath\coprod}% + \DeclareUnicodeCharacter{2211}{\ensuremath\sum}% + \DeclareUnicodeCharacter{2213}{\ensuremath\mp}% + \DeclareUnicodeCharacter{2218}{\ensuremath\circ}% + \DeclareUnicodeCharacter{221A}{\ensuremath\surd}% + \DeclareUnicodeCharacter{221D}{\ensuremath\propto}% + \DeclareUnicodeCharacter{2220}{\ensuremath\angle}% + \DeclareUnicodeCharacter{2223}{\ensuremath\mid}% + \DeclareUnicodeCharacter{2228}{\ensuremath\vee}% + \DeclareUnicodeCharacter{222A}{\ensuremath\cup}% + \DeclareUnicodeCharacter{222B}{\ensuremath\smallint}% + \DeclareUnicodeCharacter{222E}{\ensuremath\oint}% + \DeclareUnicodeCharacter{223C}{\ensuremath\sim}% + \DeclareUnicodeCharacter{2240}{\ensuremath\wr}% + \DeclareUnicodeCharacter{2243}{\ensuremath\simeq}% + \DeclareUnicodeCharacter{2245}{\ensuremath\cong}% + \DeclareUnicodeCharacter{2248}{\ensuremath\approx}% + \DeclareUnicodeCharacter{224D}{\ensuremath\asymp}% + \DeclareUnicodeCharacter{2250}{\ensuremath\doteq}% + \DeclareUnicodeCharacter{2260}{\ensuremath\neq}% + \DeclareUnicodeCharacter{226A}{\ensuremath\ll}% + \DeclareUnicodeCharacter{226B}{\ensuremath\gg}% + \DeclareUnicodeCharacter{227A}{\ensuremath\prec}% + \DeclareUnicodeCharacter{227B}{\ensuremath\succ}% + \DeclareUnicodeCharacter{2283}{\ensuremath\supset}% + \DeclareUnicodeCharacter{2286}{\ensuremath\subseteq}% + \DeclareUnicodeCharacter{228E}{\ensuremath\uplus}% + \DeclareUnicodeCharacter{228F}{\ensuremath\sqsubset}% + \DeclareUnicodeCharacter{2290}{\ensuremath\sqsupset}% + \DeclareUnicodeCharacter{2291}{\ensuremath\sqsubseteq}% + \DeclareUnicodeCharacter{2292}{\ensuremath\sqsupseteq}% + \DeclareUnicodeCharacter{2293}{\ensuremath\sqcap}% + \DeclareUnicodeCharacter{2294}{\ensuremath\sqcup}% + \DeclareUnicodeCharacter{2295}{\ensuremath\oplus}% + \DeclareUnicodeCharacter{2296}{\ensuremath\ominus}% + \DeclareUnicodeCharacter{2297}{\ensuremath\otimes}% + \DeclareUnicodeCharacter{2298}{\ensuremath\oslash}% + \DeclareUnicodeCharacter{2299}{\ensuremath\odot}% + \DeclareUnicodeCharacter{22A2}{\ensuremath\vdash}% + \DeclareUnicodeCharacter{22A3}{\ensuremath\dashv}% + \DeclareUnicodeCharacter{22A4}{\ensuremath\ptextop}% + \DeclareUnicodeCharacter{22A5}{\ensuremath\bot}% + \DeclareUnicodeCharacter{22A8}{\ensuremath\models}% + \DeclareUnicodeCharacter{22B4}{\ensuremath\unlhd}% + \DeclareUnicodeCharacter{22B5}{\ensuremath\unrhd}% + \DeclareUnicodeCharacter{22C0}{\ensuremath\bigwedge}% + \DeclareUnicodeCharacter{22C1}{\ensuremath\bigvee}% + \DeclareUnicodeCharacter{22C2}{\ensuremath\bigcap}% + \DeclareUnicodeCharacter{22C3}{\ensuremath\bigcup}% + \DeclareUnicodeCharacter{22C4}{\ensuremath\diamond}% + \DeclareUnicodeCharacter{22C5}{\ensuremath\cdot}% + \DeclareUnicodeCharacter{22C6}{\ensuremath\star}% + \DeclareUnicodeCharacter{22C8}{\ensuremath\bowtie}% + \DeclareUnicodeCharacter{2308}{\ensuremath\lceil}% + \DeclareUnicodeCharacter{2309}{\ensuremath\rceil}% + \DeclareUnicodeCharacter{230A}{\ensuremath\lfloor}% + \DeclareUnicodeCharacter{230B}{\ensuremath\rfloor}% + \DeclareUnicodeCharacter{2322}{\ensuremath\frown}% + \DeclareUnicodeCharacter{2323}{\ensuremath\smile}% + % + \DeclareUnicodeCharacter{25A1}{\ensuremath\Box}% + \DeclareUnicodeCharacter{25B3}{\ensuremath\triangle}% + \DeclareUnicodeCharacter{25B7}{\ensuremath\triangleright}% + \DeclareUnicodeCharacter{25BD}{\ensuremath\bigtriangledown}% + \DeclareUnicodeCharacter{25C1}{\ensuremath\triangleleft}% + \DeclareUnicodeCharacter{25C7}{\ensuremath\Diamond}% + \DeclareUnicodeCharacter{2660}{\ensuremath\spadesuit}% + \DeclareUnicodeCharacter{2661}{\ensuremath\heartsuit}% + \DeclareUnicodeCharacter{2662}{\ensuremath\diamondsuit}% + \DeclareUnicodeCharacter{2663}{\ensuremath\clubsuit}% + \DeclareUnicodeCharacter{266D}{\ensuremath\flat}% + \DeclareUnicodeCharacter{266E}{\ensuremath\natural}% + \DeclareUnicodeCharacter{266F}{\ensuremath\sharp}% + \DeclareUnicodeCharacter{26AA}{\ensuremath\bigcirc}% + \DeclareUnicodeCharacter{27B9}{\ensuremath\rangle}% + \DeclareUnicodeCharacter{27C2}{\ensuremath\perp}% + \DeclareUnicodeCharacter{27E8}{\ensuremath\langle}% + \DeclareUnicodeCharacter{27F5}{\ensuremath\longleftarrow}% + \DeclareUnicodeCharacter{27F6}{\ensuremath\longrightarrow}% + \DeclareUnicodeCharacter{27F7}{\ensuremath\longleftrightarrow}% + \DeclareUnicodeCharacter{27FC}{\ensuremath\longmapsto}% + \DeclareUnicodeCharacter{29F5}{\ensuremath\setminus}% + \DeclareUnicodeCharacter{2A00}{\ensuremath\bigodot}% + \DeclareUnicodeCharacter{2A01}{\ensuremath\bigoplus}% + \DeclareUnicodeCharacter{2A02}{\ensuremath\bigotimes}% + \DeclareUnicodeCharacter{2A04}{\ensuremath\biguplus}% + \DeclareUnicodeCharacter{2A06}{\ensuremath\bigsqcup}% + \DeclareUnicodeCharacter{2A1D}{\ensuremath\Join}% + \DeclareUnicodeCharacter{2A3F}{\ensuremath\amalg}% + \DeclareUnicodeCharacter{2AAF}{\ensuremath\preceq}% + \DeclareUnicodeCharacter{2AB0}{\ensuremath\succeq}% + % + \global\mathchardef\checkmark="1370% actually the square root sign + \DeclareUnicodeCharacter{2713}{\ensuremath\checkmark}% +}% end of \unicodechardefs + +% UTF-8 byte sequence (pdfTeX) definitions (replacing and @U command) +% It makes the setting that replace UTF-8 byte sequence. +\def\utfeightchardefs{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterUTFviii + \unicodechardefs +} + +% Whether the active definitions of non-ASCII characters expand to +% non-active tokens with the same character code. This is used to +% write characters literally, instead of using active definitions for +% printing the correct glyphs. +\newif\ifpassthroughchars +\passthroughcharsfalse + +% For native Unicode handling (XeTeX and LuaTeX), +% provide a definition macro to replace/pass-through a Unicode character +% +\def\DeclareUnicodeCharacterNative#1#2{% + \catcode"#1=\active + \def\dodeclareunicodecharacternative##1##2##3{% + \begingroup + \uccode`\~="##2\relax + \uppercase{\gdef~}{% + \ifpassthroughchars + ##1% + \else + ##3% + \fi + } + \endgroup + } + \begingroup + \uccode`\.="#1\relax + \uppercase{\def\UTFNativeTmp{.}}% + \expandafter\dodeclareunicodecharacternative\UTFNativeTmp{#1}{#2}% + \endgroup +} + +% Native Unicode handling (XeTeX and LuaTeX) character replacing definition. +% It activates the setting that replaces Unicode characters. +\def\nativeunicodechardefs{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterNative + \unicodechardefs +} + +% For native Unicode handling (XeTeX and LuaTeX), +% make the character token expand +% to the sequences given in \unicodechardefs for printing. +\def\DeclareUnicodeCharacterNativeAtU#1#2{% + \def\UTFAtUTmp{#2} + \expandafter\globallet\csname uni:#1\endcsname \UTFAtUTmp +} + +% @U command definitions for native Unicode handling (XeTeX and LuaTeX). +\def\nativeunicodechardefsatu{% + \let\DeclareUnicodeCharacter\DeclareUnicodeCharacterNativeAtU + \unicodechardefs +} + +% US-ASCII character definitions. +\def\asciichardefs{% nothing need be done + \relax +} + +% define all Unicode characters we know about, for the sake of @U. +\iftxinativeunicodecapable + \nativeunicodechardefsatu +\else + \utfeightchardefs +\fi + + +% Make non-ASCII characters printable again for compatibility with +% existing Texinfo documents that may use them, even without declaring a +% document encoding. +% +\setnonasciicharscatcode \other + + +\message{formatting,} + +\newdimen\defaultparindent \defaultparindent = 15pt + +\chapheadingskip = 15pt plus 4pt minus 2pt +\secheadingskip = 12pt plus 3pt minus 2pt +\subsecheadingskip = 9pt plus 2pt minus 2pt + +% Prevent underfull vbox error messages. +\vbadness = 10000 + +% Don't be very finicky about underfull hboxes, either. +\hbadness = 6666 + +% Following George Bush, get rid of widows and orphans. +\widowpenalty=10000 +\clubpenalty=10000 + +% Use TeX 3.0's \emergencystretch to help line breaking, but if we're +% using an old version of TeX, don't do anything. We want the amount of +% stretch added to depend on the line length, hence the dependence on +% \hsize. We call this whenever the paper size is set. +% +\def\setemergencystretch{% + \ifx\emergencystretch\thisisundefined + % Allow us to assign to \emergencystretch anyway. + \def\emergencystretch{\dimen0}% + \else + \emergencystretch = .15\hsize + \fi +} + +% Parameters in order: 1) textheight; 2) textwidth; +% 3) voffset; 4) hoffset; 5) binding offset; 6) topskip; +% 7) physical page height; 8) physical page width. +% +% We also call \setleading{\textleading}, so the caller should define +% \textleading. The caller should also set \parskip. +% +\def\internalpagesizes#1#2#3#4#5#6#7#8{% + \voffset = #3\relax + \topskip = #6\relax + \splittopskip = \topskip + % + \vsize = #1\relax + \advance\vsize by \topskip + \outervsize = \vsize + \advance\outervsize by 2\topandbottommargin + \txipageheight = \vsize + % + \hsize = #2\relax + \outerhsize = \hsize + \advance\outerhsize by 0.5in + \txipagewidth = \hsize + % + \normaloffset = #4\relax + \bindingoffset = #5\relax + % + \ifpdf + \pdfpageheight #7\relax + \pdfpagewidth #8\relax + % if we don't reset these, they will remain at "1 true in" of + % whatever layout pdftex was dumped with. + \pdfhorigin = 1 true in + \pdfvorigin = 1 true in + \else + \ifx\XeTeXrevision\thisisundefined + \else + \pdfpageheight #7\relax + \pdfpagewidth #8\relax + % XeTeX does not have \pdfhorigin and \pdfvorigin. + \fi + \fi + % + \setleading{\textleading} + % + \parindent = \defaultparindent + \setemergencystretch +} + +% @letterpaper (the default). +\def\letterpaper{{\globaldefs = 1 + \parskip = 3pt plus 2pt minus 1pt + \textleading = 13.2pt + % + % If page is nothing but text, make it come out even. + \internalpagesizes{607.2pt}{6in}% that's 46 lines + {\voffset}{.25in}% + {\bindingoffset}{36pt}% + {11in}{8.5in}% +}} + +% Use @smallbook to reset parameters for 7x9.25 trim size. +\def\smallbook{{\globaldefs = 1 + \parskip = 2pt plus 1pt + \textleading = 12pt + % + \internalpagesizes{7.5in}{5in}% + {-.2in}{0in}% + {\bindingoffset}{16pt}% + {9.25in}{7in}% + % + \lispnarrowing = 0.3in + \tolerance = 700 + \hfuzz = 1pt + \contentsrightmargin = 0pt + \defbodyindent = .5cm +}} + +% Use @smallerbook to reset parameters for 6x9 trim size. +% (Just testing, parameters still in flux.) +\def\smallerbook{{\globaldefs = 1 + \parskip = 1.5pt plus 1pt + \textleading = 12pt + % + \internalpagesizes{7.4in}{4.8in}% + {-.2in}{-.4in}% + {0pt}{14pt}% + {9in}{6in}% + % + \lispnarrowing = 0.25in + \tolerance = 700 + \hfuzz = 1pt + \contentsrightmargin = 0pt + \defbodyindent = .4cm +}} + +% Use @afourpaper to print on European A4 paper. +\def\afourpaper{{\globaldefs = 1 + \parskip = 3pt plus 2pt minus 1pt + \textleading = 13.2pt + % + % Double-side printing via postscript on Laserjet 4050 + % prints double-sided nicely when \bindingoffset=10mm and \hoffset=-6mm. + % To change the settings for a different printer or situation, adjust + % \normaloffset until the front-side and back-side texts align. Then + % do the same for \bindingoffset. You can set these for testing in + % your texinfo source file like this: + % @tex + % \global\normaloffset = -6mm + % \global\bindingoffset = 10mm + % @end tex + \internalpagesizes{673.2pt}{160mm}% that's 51 lines + {\voffset}{\hoffset}% + {\bindingoffset}{44pt}% + {297mm}{210mm}% + % + \tolerance = 700 + \hfuzz = 1pt + \contentsrightmargin = 0pt + \defbodyindent = 5mm +}} + +% Use @afivepaper to print on European A5 paper. +% From romildo@urano.iceb.ufop.br, 2 July 2000. +% He also recommends making @example and @lisp be small. +\def\afivepaper{{\globaldefs = 1 + \parskip = 2pt plus 1pt minus 0.1pt + \textleading = 12.5pt + % + \internalpagesizes{160mm}{120mm}% + {\voffset}{\hoffset}% + {\bindingoffset}{8pt}% + {210mm}{148mm}% + % + \lispnarrowing = 0.2in + \tolerance = 800 + \hfuzz = 1.2pt + \contentsrightmargin = 0pt + \defbodyindent = 2mm + \tableindent = 12mm +}} + +% A specific text layout, 24x15cm overall, intended for A4 paper. +\def\afourlatex{{\globaldefs = 1 + \afourpaper + \internalpagesizes{237mm}{150mm}% + {\voffset}{4.6mm}% + {\bindingoffset}{7mm}% + {297mm}{210mm}% + % + % Must explicitly reset to 0 because we call \afourpaper. + \globaldefs = 0 +}} + +% Use @afourwide to print on A4 paper in landscape format. +\def\afourwide{{\globaldefs = 1 + \afourpaper + \internalpagesizes{241mm}{165mm}% + {\voffset}{-2.95mm}% + {\bindingoffset}{7mm}% + {297mm}{210mm}% + \globaldefs = 0 +}} + +% @pagesizes TEXTHEIGHT[,TEXTWIDTH] +% Perhaps we should allow setting the margins, \topskip, \parskip, +% and/or leading, also. Or perhaps we should compute them somehow. +% +\parseargdef\pagesizes{\pagesizesyyy #1,,\finish} +\def\pagesizesyyy#1,#2,#3\finish{{% + \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \hsize=#2\relax \fi + \globaldefs = 1 + % + \parskip = 3pt plus 2pt minus 1pt + \setleading{\textleading}% + % + \dimen0 = #1\relax + \advance\dimen0 by \voffset + % + \dimen2 = \hsize + \advance\dimen2 by \normaloffset + % + \internalpagesizes{#1}{\hsize}% + {\voffset}{\normaloffset}% + {\bindingoffset}{44pt}% + {\dimen0}{\dimen2}% +}} + +% Set default to letter. +% +\letterpaper + + +\message{and turning on texinfo input format.} + +\def^^L{\par} % remove \outer, so ^L can appear in an @comment + +% DEL is a comment character, in case @c does not suffice. +\catcode`\^^? = 14 + +% Define macros to output various characters with catcode for normal text. +\catcode`\"=\other \def\normaldoublequote{"} +\catcode`\$=\other \def\normaldollar{$}%$ font-lock fix +\catcode`\+=\other \def\normalplus{+} +\catcode`\<=\other \def\normalless{<} +\catcode`\>=\other \def\normalgreater{>} +\catcode`\^=\other \def\normalcaret{^} +\catcode`\_=\other \def\normalunderscore{_} +\catcode`\|=\other \def\normalverticalbar{|} +\catcode`\~=\other \def\normaltilde{~} + +% This macro is used to make a character print one way in \tt +% (where it can probably be output as-is), and another way in other fonts, +% where something hairier probably needs to be done. +% +% #1 is what to print if we are indeed using \tt; #2 is what to print +% otherwise. Since all the Computer Modern typewriter fonts have zero +% interword stretch (and shrink), and it is reasonable to expect all +% typewriter fonts to have this, we can check that font parameter. +% +\def\ifusingtt#1#2{\ifdim \fontdimen3\font=0pt #1\else #2\fi} + +% Same as above, but check for italic font. Actually this also catches +% non-italic slanted fonts since it is impossible to distinguish them from +% italic fonts. But since this is only used by $ and it uses \sl anyway +% this is not a problem. +\def\ifusingit#1#2{\ifdim \fontdimen1\font>0pt #1\else #2\fi} + +% Set catcodes for Texinfo file + +% Active characters for printing the wanted glyph. +% Most of these we simply print from the \tt font, but for some, we can +% use math or other variants that look better in normal text. +% +\catcode`\"=\active +\def\activedoublequote{{\tt\char34}} +\let"=\activedoublequote +\catcode`\~=\active \def\activetilde{{\tt\char126}} \let~ = \activetilde +\chardef\hatchar=`\^ +\catcode`\^=\active \def\activehat{{\tt \hatchar}} \let^ = \activehat + +\catcode`\_=\active +\def_{\ifusingtt\normalunderscore\_} +\def\_{\leavevmode \kern.07em \vbox{\hrule width.3em height.1ex}\kern .07em } +\let\realunder=_ + +\catcode`\|=\active \def|{{\tt\char124}} + +\chardef \less=`\< +\catcode`\<=\active \def\activeless{{\tt \less}}\let< = \activeless +\chardef \gtr=`\> +\catcode`\>=\active \def\activegtr{{\tt \gtr}}\let> = \activegtr +\catcode`\+=\active \def+{{\tt \char 43}} +\catcode`\$=\active \def${\ifusingit{{\sl\$}}\normaldollar}%$ font-lock fix +\catcode`\-=\active \let-=\normaldash + + +% used for headline/footline in the output routine, in case the page +% breaks in the middle of an @tex block. +\def\texinfochars{% + \let< = \activeless + \let> = \activegtr + \let~ = \activetilde + \let^ = \activehat + \markupsetuplqdefault \markupsetuprqdefault + \let\b = \strong + \let\i = \smartitalic + % in principle, all other definitions in \tex have to be undone too. +} + +% Used sometimes to turn off (effectively) the active characters even after +% parsing them. +\def\turnoffactive{% + \normalturnoffactive + \otherbackslash +} + +\catcode`\@=0 + +% \backslashcurfont outputs one backslash character in current font, +% as in \char`\\. +\global\chardef\backslashcurfont=`\\ +\global\let\rawbackslashxx=\backslashcurfont % let existing .??s files work + +% \realbackslash is an actual character `\' with catcode other, and +% \doublebackslash is two of them (for the pdf outlines). +{\catcode`\\=\other @gdef@realbackslash{\} @gdef@doublebackslash{\\}} + +% In Texinfo, backslash is an active character; it prints the backslash +% in fixed width font. +\catcode`\\=\active % @ for escape char from now on. + +% Print a typewriter backslash. For math mode, we can't simply use +% \backslashcurfont: the story here is that in math mode, the \char +% of \backslashcurfont ends up printing the roman \ from the math symbol +% font (because \char in math mode uses the \mathcode, and plain.tex +% sets \mathcode`\\="026E). Hence we use an explicit \mathchar, +% which is the decimal equivalent of "715c (class 7, e.g., use \fam; +% ignored family value; char position "5C). We can't use " for the +% usual hex value because it has already been made active. + +@def@ttbackslash{{@tt @ifmmode @mathchar29020 @else @backslashcurfont @fi}} +@let@backslashchar = @ttbackslash % @backslashchar{} is for user documents. + +% \rawbackslash defines an active \ to do \backslashcurfont. +% \otherbackslash defines an active \ to be a literal `\' character with +% catcode other. We switch back and forth between these. +@gdef@rawbackslash{@let\=@backslashcurfont} +@gdef@otherbackslash{@let\=@realbackslash} + +% Same as @turnoffactive except outputs \ as {\tt\char`\\} instead of +% the literal character `\'. +% +{@catcode`- = @active + @gdef@normalturnoffactive{% + @passthroughcharstrue + @let-=@normaldash + @let"=@normaldoublequote + @let$=@normaldollar %$ font-lock fix + @let+=@normalplus + @let<=@normalless + @let>=@normalgreater + @let^=@normalcaret + @let_=@normalunderscore + @let|=@normalverticalbar + @let~=@normaltilde + @let\=@ttbackslash + @markupsetuplqdefault + @markupsetuprqdefault + @unsepspaces + } +} + +% If a .fmt file is being used, characters that might appear in a file +% name cannot be active until we have parsed the command line. +% So turn them off again, and have @fixbackslash turn them back on. +@catcode`+=@other @catcode`@_=@other + +% \enablebackslashhack - allow file to begin `\input texinfo' +% +% If a .fmt file is being used, we don't want the `\input texinfo' to show up. +% That is what \eatinput is for; after that, the `\' should revert to printing +% a backslash. +% If the file did not have a `\input texinfo', then it is turned off after +% the first line; otherwise the first `\' in the file would cause an error. +% This is used on the very last line of this file, texinfo.tex. +% We also use @c to call @fixbackslash, in case ends of lines are hidden. +{ +@catcode`@^=7 +@catcode`@^^M=13@gdef@enablebackslashhack{% + @global@let\ = @eatinput% + @catcode`@^^M=13% + @def@c{@fixbackslash@c}% + % Definition for the newline at the end of this file. + @def ^^M{@let^^M@secondlinenl}% + % Definition for a newline in the main Texinfo file. + @gdef @secondlinenl{@fixbackslash}% +}} + +{@catcode`@^=7 @catcode`@^^M=13% +@gdef@eatinput input texinfo#1^^M{@fixbackslash}} + +% Emergency active definition of newline, in case an active newline token +% appears by mistake. +{@catcode`@^=7 @catcode13=13% +@gdef@enableemergencynewline{% + @gdef^^M{% + @par% + %<warning: active newline>@par% +}}} + + +@gdef@fixbackslash{% + @ifx\@eatinput @let\ = @ttbackslash @fi + @catcode13=5 % regular end of line + @enableemergencynewline + @let@c=@texinfoc + % Also turn back on active characters that might appear in the input + % file name, in case not using a pre-dumped format. + @catcode`+=@active + @catcode`@_=@active + % + % If texinfo.cnf is present on the system, read it. + % Useful for site-wide @afourpaper, etc. This macro, @fixbackslash, gets + % called at the beginning of every Texinfo file. Not opening texinfo.cnf + % directly in this file, texinfo.tex, makes it possible to make a format + % file for Texinfo. + % + @openin 1 texinfo.cnf + @ifeof 1 @else @input texinfo.cnf @fi + @closein 1 +} + + +% Say @foo, not \foo, in error messages. +@escapechar = `@@ + +% These (along with & and #) are made active for url-breaking, so need +% active definitions as the normal characters. +@def@normaldot{.} +@def@normalquest{?} +@def@normalslash{/} + +% These look ok in all fonts, so just make them not special. +% @hashchar{} gets its own user-level command, because of #line. +@catcode`@& = @other @def@normalamp{&} +@catcode`@# = @other @def@normalhash{#} +@catcode`@% = @other @def@normalpercent{%} + +@let @hashchar = @normalhash + +@c Finally, make ` and ' active, so that txicodequoteundirected and +@c txicodequotebacktick work right in, e.g., @w{@code{`foo'}}. If we +@c don't make ` and ' active, @code will not get them as active chars. +@c Do this last of all since we use ` in the previous @catcode assignments. +@catcode`@'=@active +@catcode`@`=@active +@markupsetuplqdefault +@markupsetuprqdefault + +@c Local variables: +@c eval: (add-hook 'write-file-hooks 'time-stamp) +@c page-delimiter: "^\\\\message\\|emacs-page" +@c time-stamp-start: "def\\\\texinfoversion{" +@c time-stamp-format: "%:y-%02m-%02d.%02H" +@c time-stamp-end: "}" +@c End: + +@c vim:sw=2: + +@enablebackslashhack diff --git a/REORG.TODO/manual/texis.awk b/REORG.TODO/manual/texis.awk new file mode 100644 index 0000000000..153724755d --- /dev/null +++ b/REORG.TODO/manual/texis.awk @@ -0,0 +1,21 @@ +BEGIN { + print "texis = \\"; + for(x = 1; x < ARGC; x++) + { + input[0] = ARGV[x]; + print ARGV[x], "\\"; + for (s = 0; s >= 0; s--) + { + while ((getline < input[s]) > 0) + { + if ($1 == "@include") + { + input[++s] = $2; + print $2, "\\"; + } + } + close(input[s]); + } + } + print ""; +} diff --git a/REORG.TODO/manual/threads.texi b/REORG.TODO/manual/threads.texi new file mode 100644 index 0000000000..d7fac825c8 --- /dev/null +++ b/REORG.TODO/manual/threads.texi @@ -0,0 +1,252 @@ +@node POSIX Threads +@c @node POSIX Threads, Internal Probes, Cryptographic Functions, Top +@chapter POSIX Threads +@c %MENU% POSIX Threads +@cindex pthreads + +This chapter describes the @glibcadj{} POSIX Threads implementation. + +@menu +* Thread-specific Data:: Support for creating and + managing thread-specific data +* Non-POSIX Extensions:: Additional functions to extend + POSIX Thread functionality +@end menu + +@node Thread-specific Data +@section Thread-specific Data + +The @glibcadj{} implements functions to allow users to create and manage +data specific to a thread. Such data may be destroyed at thread exit, +if a destructor is provided. The following functions are defined: + +@comment pthread.h +@comment POSIX +@deftypefun int pthread_key_create (pthread_key_t *@var{key}, void (*@var{destructor})(void*)) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c pthread_key_create ok +@c KEY_UNUSED ok +@c KEY_USABLE ok +Create a thread-specific data key for the calling thread, referenced by +@var{key}. + +Objects declared with the C++11 @code{thread_local} keyword are destroyed +before thread-specific data, so they should not be used in thread-specific +data destructors or even as members of the thread-specific data, since the +latter is passed as an argument to the destructor function. +@end deftypefun + +@comment pthread.h +@comment POSIX +@deftypefun int pthread_key_delete (pthread_key_t @var{key}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c pthread_key_delete ok +@c This uses atomic compare and exchange to increment the seq number +@c after testing it's not a KEY_UNUSED seq number. +@c KEY_UNUSED dup ok +Destroy the thread-specific data @var{key} in the calling thread. The +destructor for the thread-specific data is not called during destruction, nor +is it called during thread exit. +@end deftypefun + +@comment pthread.h +@comment POSIX +@deftypefun void *pthread_getspecific (pthread_key_t @var{key}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c pthread_getspecific ok +Return the thread-specific data associated with @var{key} in the calling +thread. +@end deftypefun + +@comment pthread.h +@comment POSIX +@deftypefun int pthread_setspecific (pthread_key_t @var{key}, const void *@var{value}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{} @ascuheap{}}@acunsafe{@acucorrupt{} @acsmem{}}} +@c pthread_setspecific @asucorrupt @ascuheap @acucorrupt @acsmem +@c a level2 block may be allocated by a signal handler after +@c another call already made a decision to allocate it, thus losing +@c the allocated value. the seq number is updated before the +@c value, which might cause an earlier-generation value to seem +@c current if setspecific is cancelled or interrupted by a signal +@c KEY_UNUSED ok +@c calloc dup @ascuheap @acsmem +Associate the thread-specific @var{value} with @var{key} in the calling thread. +@end deftypefun + + +@node Non-POSIX Extensions +@section Non-POSIX Extensions + +In addition to implementing the POSIX API for threads, @theglibc{} provides +additional functions and interfaces to provide functionality not specified in +the standard. + +@menu +* Default Thread Attributes:: Setting default attributes for + threads in a process. +@end menu + +@node Default Thread Attributes +@subsection Setting Process-wide defaults for thread attributes + +@Theglibc{} provides non-standard API functions to set and get the default +attributes used in the creation of threads in a process. + +@comment pthread.h +@comment GNU +@deftypefun int pthread_getattr_default_np (pthread_attr_t *@var{attr}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c Takes lock around read from default_pthread_attr. +Get the default attribute values and set @var{attr} to match. This +function returns @math{0} on success and a non-zero error code on +failure. +@end deftypefun + +@comment pthread.h +@comment GNU +@deftypefun int pthread_setattr_default_np (pthread_attr_t *@var{attr}) +@safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{}}} +@c pthread_setattr_default_np @ascuheap @asulock @aculock @acsmem +@c check_sched_policy_attr ok +@c check_sched_priority_attr ok +@c sched_get_priority_min dup ok +@c sched_get_priority_max dup ok +@c check_stacksize_attr ok +@c lll_lock @asulock @aculock +@c free dup @ascuheap @acsmem +@c realloc dup @ascuheap @acsmem +@c memcpy dup ok +@c lll_unlock @asulock @aculock +Set the default attribute values to match the values in @var{attr}. The +function returns @math{0} on success and a non-zero error code on failure. +The following error codes are defined for this function: + +@table @code +@item EINVAL +At least one of the values in @var{attr} does not qualify as valid for the +attributes or the stack address is set in the attribute. +@item ENOMEM +The system does not have sufficient memory. +@end table +@end deftypefun + +@c FIXME these are undocumented: +@c pthread_atfork +@c pthread_attr_destroy +@c pthread_attr_getaffinity_np +@c pthread_attr_getdetachstate +@c pthread_attr_getguardsize +@c pthread_attr_getinheritsched +@c pthread_attr_getschedparam +@c pthread_attr_getschedpolicy +@c pthread_attr_getscope +@c pthread_attr_getstack +@c pthread_attr_getstackaddr +@c pthread_attr_getstacksize +@c pthread_attr_init +@c pthread_attr_setaffinity_np +@c pthread_attr_setdetachstate +@c pthread_attr_setguardsize +@c pthread_attr_setinheritsched +@c pthread_attr_setschedparam +@c pthread_attr_setschedpolicy +@c pthread_attr_setscope +@c pthread_attr_setstack +@c pthread_attr_setstackaddr +@c pthread_attr_setstacksize +@c pthread_barrierattr_destroy +@c pthread_barrierattr_getpshared +@c pthread_barrierattr_init +@c pthread_barrierattr_setpshared +@c pthread_barrier_destroy +@c pthread_barrier_init +@c pthread_barrier_wait +@c pthread_cancel +@c pthread_cleanup_push +@c pthread_cleanup_pop +@c pthread_condattr_destroy +@c pthread_condattr_getclock +@c pthread_condattr_getpshared +@c pthread_condattr_init +@c pthread_condattr_setclock +@c pthread_condattr_setpshared +@c pthread_cond_broadcast +@c pthread_cond_destroy +@c pthread_cond_init +@c pthread_cond_signal +@c pthread_cond_timedwait +@c pthread_cond_wait +@c pthread_create +@c pthread_detach +@c pthread_equal +@c pthread_exit +@c pthread_getaffinity_np +@c pthread_getattr_np +@c pthread_getconcurrency +@c pthread_getcpuclockid +@c pthread_getname_np +@c pthread_getschedparam +@c pthread_join +@c pthread_kill +@c pthread_kill_other_threads_np +@c pthread_mutexattr_destroy +@c pthread_mutexattr_getkind_np +@c pthread_mutexattr_getprioceiling +@c pthread_mutexattr_getprotocol +@c pthread_mutexattr_getpshared +@c pthread_mutexattr_getrobust +@c pthread_mutexattr_getrobust_np +@c pthread_mutexattr_gettype +@c pthread_mutexattr_init +@c pthread_mutexattr_setkind_np +@c pthread_mutexattr_setprioceiling +@c pthread_mutexattr_setprotocol +@c pthread_mutexattr_setpshared +@c pthread_mutexattr_setrobust +@c pthread_mutexattr_setrobust_np +@c pthread_mutexattr_settype +@c pthread_mutex_consistent +@c pthread_mutex_consistent_np +@c pthread_mutex_destroy +@c pthread_mutex_getprioceiling +@c pthread_mutex_init +@c pthread_mutex_lock +@c pthread_mutex_setprioceiling +@c pthread_mutex_timedlock +@c pthread_mutex_trylock +@c pthread_mutex_unlock +@c pthread_once +@c pthread_rwlockattr_destroy +@c pthread_rwlockattr_getkind_np +@c pthread_rwlockattr_getpshared +@c pthread_rwlockattr_init +@c pthread_rwlockattr_setkind_np +@c pthread_rwlockattr_setpshared +@c pthread_rwlock_destroy +@c pthread_rwlock_init +@c pthread_rwlock_rdlock +@c pthread_rwlock_timedrdlock +@c pthread_rwlock_timedwrlock +@c pthread_rwlock_tryrdlock +@c pthread_rwlock_trywrlock +@c pthread_rwlock_unlock +@c pthread_rwlock_wrlock +@c pthread_self +@c pthread_setaffinity_np +@c pthread_setcancelstate +@c pthread_setcanceltype +@c pthread_setconcurrency +@c pthread_setname_np +@c pthread_setschedparam +@c pthread_setschedprio +@c pthread_sigmask +@c pthread_sigqueue +@c pthread_spin_destroy +@c pthread_spin_init +@c pthread_spin_lock +@c pthread_spin_trylock +@c pthread_spin_unlock +@c pthread_testcancel +@c pthread_timedjoin_np +@c pthread_tryjoin_np +@c pthread_yield diff --git a/REORG.TODO/manual/time.texi b/REORG.TODO/manual/time.texi new file mode 100644 index 0000000000..dccb979955 --- /dev/null +++ b/REORG.TODO/manual/time.texi @@ -0,0 +1,2922 @@ +@node Date and Time, Resource Usage And Limitation, Arithmetic, Top +@c %MENU% Functions for getting the date and time and formatting them nicely +@chapter Date and Time + +This chapter describes functions for manipulating dates and times, +including functions for determining what time it is and conversion +between different time representations. + +@menu +* Time Basics:: Concepts and definitions. +* Elapsed Time:: Data types to represent elapsed times +* Processor And CPU Time:: Time a program has spent executing. +* Calendar Time:: Manipulation of ``real'' dates and times. +* Setting an Alarm:: Sending a signal after a specified time. +* Sleeping:: Waiting for a period of time. +@end menu + + +@node Time Basics +@section Time Basics +@cindex time + +Discussing time in a technical manual can be difficult because the word +``time'' in English refers to lots of different things. In this manual, +we use a rigorous terminology to avoid confusion, and the only thing we +use the simple word ``time'' for is to talk about the abstract concept. + +A @dfn{calendar time} is a point in the time continuum, for example +November 4, 1990, at 18:02.5 UTC. Sometimes this is called ``absolute +time''. +@cindex calendar time + +We don't speak of a ``date'', because that is inherent in a calendar +time. +@cindex date + +An @dfn{interval} is a contiguous part of the time continuum between two +calendar times, for example the hour between 9:00 and 10:00 on July 4, +1980. +@cindex interval + +An @dfn{elapsed time} is the length of an interval, for example, 35 +minutes. People sometimes sloppily use the word ``interval'' to refer +to the elapsed time of some interval. +@cindex elapsed time +@cindex time, elapsed + +An @dfn{amount of time} is a sum of elapsed times, which need not be of +any specific intervals. For example, the amount of time it takes to +read a book might be 9 hours, independently of when and in how many +sittings it is read. + +A @dfn{period} is the elapsed time of an interval between two events, +especially when they are part of a sequence of regularly repeating +events. +@cindex period of time + +@dfn{CPU time} is like calendar time, except that it is based on the +subset of the time continuum when a particular process is actively +using a CPU. CPU time is, therefore, relative to a process. +@cindex CPU time + +@dfn{Processor time} is an amount of time that a CPU is in use. In +fact, it's a basic system resource, since there's a limit to how much +can exist in any given interval (that limit is the elapsed time of the +interval times the number of CPUs in the processor). People often call +this CPU time, but we reserve the latter term in this manual for the +definition above. +@cindex processor time + +@node Elapsed Time +@section Elapsed Time +@cindex elapsed time + +One way to represent an elapsed time is with a simple arithmetic data +type, as with the following function to compute the elapsed time between +two calendar times. This function is declared in @file{time.h}. + +@comment time.h +@comment ISO +@deftypefun double difftime (time_t @var{time1}, time_t @var{time0}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{difftime} function returns the number of seconds of elapsed +time between calendar time @var{time1} and calendar time @var{time0}, as +a value of type @code{double}. The difference ignores leap seconds +unless leap second support is enabled. + +In @theglibc{}, you can simply subtract @code{time_t} values. But on +other systems, the @code{time_t} data type might use some other encoding +where subtraction doesn't work directly. +@end deftypefun + +@Theglibc{} provides two data types specifically for representing +an elapsed time. They are used by various @glibcadj{} functions, and +you can use them for your own purposes too. They're exactly the same +except that one has a resolution in microseconds, and the other, newer +one, is in nanoseconds. + +@comment sys/time.h +@comment BSD +@deftp {Data Type} {struct timeval} +@cindex timeval +The @code{struct timeval} structure represents an elapsed time. It is +declared in @file{sys/time.h} and has the following members: + +@table @code +@item time_t tv_sec +This represents the number of whole seconds of elapsed time. + +@item long int tv_usec +This is the rest of the elapsed time (a fraction of a second), +represented as the number of microseconds. It is always less than one +million. + +@end table +@end deftp + +@comment sys/time.h +@comment POSIX.1 +@deftp {Data Type} {struct timespec} +@cindex timespec +The @code{struct timespec} structure represents an elapsed time. It is +declared in @file{time.h} and has the following members: + +@table @code +@item time_t tv_sec +This represents the number of whole seconds of elapsed time. + +@item long int tv_nsec +This is the rest of the elapsed time (a fraction of a second), +represented as the number of nanoseconds. It is always less than one +billion. + +@end table +@end deftp + +It is often necessary to subtract two values of type @w{@code{struct +timeval}} or @w{@code{struct timespec}}. Here is the best way to do +this. It works even on some peculiar operating systems where the +@code{tv_sec} member has an unsigned type. + +@smallexample +@include timeval_subtract.c.texi +@end smallexample + +Common functions that use @code{struct timeval} are @code{gettimeofday} +and @code{settimeofday}. + + +There are no @glibcadj{} functions specifically oriented toward +dealing with elapsed times, but the calendar time, processor time, and +alarm and sleeping functions have a lot to do with them. + + +@node Processor And CPU Time +@section Processor And CPU Time + +If you're trying to optimize your program or measure its efficiency, +it's very useful to know how much processor time it uses. For that, +calendar time and elapsed times are useless because a process may spend +time waiting for I/O or for other processes to use the CPU. However, +you can get the information with the functions in this section. + +CPU time (@pxref{Time Basics}) is represented by the data type +@code{clock_t}, which is a number of @dfn{clock ticks}. It gives the +total amount of time a process has actively used a CPU since some +arbitrary event. On @gnusystems{}, that event is the creation of the +process. While arbitrary in general, the event is always the same event +for any particular process, so you can always measure how much time on +the CPU a particular computation takes by examining the process' CPU +time before and after the computation. +@cindex CPU time +@cindex clock ticks +@cindex ticks, clock + +On @gnulinuxhurdsystems{}, @code{clock_t} is equivalent to @code{long int} and +@code{CLOCKS_PER_SEC} is an integer value. But in other systems, both +@code{clock_t} and the macro @code{CLOCKS_PER_SEC} can be either integer +or floating-point types. Casting CPU time values to @code{double}, as +in the example above, makes sure that operations such as arithmetic and +printing work properly and consistently no matter what the underlying +representation is. + +Note that the clock can wrap around. On a 32bit system with +@code{CLOCKS_PER_SEC} set to one million this function will return the +same value approximately every 72 minutes. + +For additional functions to examine a process' use of processor time, +and to control it, see @ref{Resource Usage And Limitation}. + + +@menu +* CPU Time:: The @code{clock} function. +* Processor Time:: The @code{times} function. +@end menu + +@node CPU Time +@subsection CPU Time Inquiry + +To get a process' CPU time, you can use the @code{clock} function. This +facility is declared in the header file @file{time.h}. +@pindex time.h + +In typical usage, you call the @code{clock} function at the beginning +and end of the interval you want to time, subtract the values, and then +divide by @code{CLOCKS_PER_SEC} (the number of clock ticks per second) +to get processor time, like this: + +@smallexample +@group +#include <time.h> + +clock_t start, end; +double cpu_time_used; + +start = clock(); +@dots{} /* @r{Do the work.} */ +end = clock(); +cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; +@end group +@end smallexample + +Do not use a single CPU time as an amount of time; it doesn't work that +way. Either do a subtraction as shown above or query processor time +directly. @xref{Processor Time}. + +Different computers and operating systems vary wildly in how they keep +track of CPU time. It's common for the internal processor clock +to have a resolution somewhere between a hundredth and millionth of a +second. + +@comment time.h +@comment ISO +@deftypevr Macro int CLOCKS_PER_SEC +The value of this macro is the number of clock ticks per second measured +by the @code{clock} function. POSIX requires that this value be one +million independent of the actual resolution. +@end deftypevr + +@comment time.h +@comment ISO +@deftp {Data Type} clock_t +This is the type of the value returned by the @code{clock} function. +Values of type @code{clock_t} are numbers of clock ticks. +@end deftp + +@comment time.h +@comment ISO +@deftypefun clock_t clock (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On Hurd, this calls task_info twice and adds user and system time +@c from both basic and thread time info structs. On generic posix, +@c calls times and adds utime and stime. On bsd, calls getrusage and +@c safely converts stime and utime to clock. On linux, calls +@c clock_gettime. +This function returns the calling process' current CPU time. If the CPU +time is not available or cannot be represented, @code{clock} returns the +value @code{(clock_t)(-1)}. +@end deftypefun + + +@node Processor Time +@subsection Processor Time Inquiry + +The @code{times} function returns information about a process' +consumption of processor time in a @w{@code{struct tms}} object, in +addition to the process' CPU time. @xref{Time Basics}. You should +include the header file @file{sys/times.h} to use this facility. +@cindex processor time +@cindex CPU time +@pindex sys/times.h + +@comment sys/times.h +@comment POSIX.1 +@deftp {Data Type} {struct tms} +The @code{tms} structure is used to return information about process +times. It contains at least the following members: + +@table @code +@item clock_t tms_utime +This is the total processor time the calling process has used in +executing the instructions of its program. + +@item clock_t tms_stime +This is the processor time the system has used on behalf of the calling +process. + +@item clock_t tms_cutime +This is the sum of the @code{tms_utime} values and the @code{tms_cutime} +values of all terminated child processes of the calling process, whose +status has been reported to the parent process by @code{wait} or +@code{waitpid}; see @ref{Process Completion}. In other words, it +represents the total processor time used in executing the instructions +of all the terminated child processes of the calling process, excluding +child processes which have not yet been reported by @code{wait} or +@code{waitpid}. +@cindex child process + +@item clock_t tms_cstime +This is similar to @code{tms_cutime}, but represents the total processor +time the system has used on behalf of all the terminated child processes +of the calling process. +@end table + +All of the times are given in numbers of clock ticks. Unlike CPU time, +these are the actual amounts of time; not relative to any event. +@xref{Creating a Process}. +@end deftp + +@comment time.h +@comment POSIX.1 +@deftypevr Macro int CLK_TCK +This is an obsolete name for the number of clock ticks per second. Use +@code{sysconf (_SC_CLK_TCK)} instead. +@end deftypevr + +@comment sys/times.h +@comment POSIX.1 +@deftypefun clock_t times (struct tms *@var{buffer}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On HURD, this calls task_info twice, for basic and thread times info, +@c adding user and system times into tms, and then gettimeofday, to +@c compute the real time. On BSD, it calls getclktck, getrusage (twice) +@c and time. On Linux, it's a syscall with special handling to account +@c for clock_t counts that look like error values. +The @code{times} function stores the processor time information for +the calling process in @var{buffer}. + +The return value is the number of clock ticks since an arbitrary point +in the past, e.g. since system start-up. @code{times} returns +@code{(clock_t)(-1)} to indicate failure. +@end deftypefun + +@strong{Portability Note:} The @code{clock} function described in +@ref{CPU Time} is specified by the @w{ISO C} standard. The +@code{times} function is a feature of POSIX.1. On @gnusystems{}, the +CPU time is defined to be equivalent to the sum of the @code{tms_utime} +and @code{tms_stime} fields returned by @code{times}. + +@node Calendar Time +@section Calendar Time + +This section describes facilities for keeping track of calendar time. +@xref{Time Basics}. + +@Theglibc{} represents calendar time three ways: + +@itemize @bullet +@item +@dfn{Simple time} (the @code{time_t} data type) is a compact +representation, typically giving the number of seconds of elapsed time +since some implementation-specific base time. +@cindex simple time + +@item +There is also a "high-resolution time" representation. Like simple +time, this represents a calendar time as an elapsed time since a base +time, but instead of measuring in whole seconds, it uses a @code{struct +timeval} data type, which includes fractions of a second. Use this time +representation instead of simple time when you need greater precision. +@cindex high-resolution time + +@item +@dfn{Local time} or @dfn{broken-down time} (the @code{struct tm} data +type) represents a calendar time as a set of components specifying the +year, month, and so on in the Gregorian calendar, for a specific time +zone. This calendar time representation is usually used only to +communicate with people. +@cindex local time +@cindex broken-down time +@cindex Gregorian calendar +@cindex calendar, Gregorian +@end itemize + +@menu +* Simple Calendar Time:: Facilities for manipulating calendar time. +* High-Resolution Calendar:: A time representation with greater precision. +* Broken-down Time:: Facilities for manipulating local time. +* High Accuracy Clock:: Maintaining a high accuracy system clock. +* Formatting Calendar Time:: Converting times to strings. +* Parsing Date and Time:: Convert textual time and date information back + into broken-down time values. +* TZ Variable:: How users specify the time zone. +* Time Zone Functions:: Functions to examine or specify the time zone. +* Time Functions Example:: An example program showing use of some of + the time functions. +@end menu + +@node Simple Calendar Time +@subsection Simple Calendar Time + +This section describes the @code{time_t} data type for representing calendar +time as simple time, and the functions which operate on simple time objects. +These facilities are declared in the header file @file{time.h}. +@pindex time.h + +@cindex epoch +@comment time.h +@comment ISO +@deftp {Data Type} time_t +This is the data type used to represent simple time. Sometimes, it also +represents an elapsed time. When interpreted as a calendar time value, +it represents the number of seconds elapsed since 00:00:00 on January 1, +1970, Coordinated Universal Time. (This calendar time is sometimes +referred to as the @dfn{epoch}.) POSIX requires that this count not +include leap seconds, but on some systems this count includes leap seconds +if you set @code{TZ} to certain values (@pxref{TZ Variable}). + +Note that a simple time has no concept of local time zone. Calendar +Time @var{T} is the same instant in time regardless of where on the +globe the computer is. + +In @theglibc{}, @code{time_t} is equivalent to @code{long int}. +In other systems, @code{time_t} might be either an integer or +floating-point type. +@end deftp + +The function @code{difftime} tells you the elapsed time between two +simple calendar times, which is not always as easy to compute as just +subtracting. @xref{Elapsed Time}. + +@comment time.h +@comment ISO +@deftypefun time_t time (time_t *@var{result}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{time} function returns the current calendar time as a value of +type @code{time_t}. If the argument @var{result} is not a null pointer, +the calendar time value is also stored in @code{*@var{result}}. If the +current calendar time is not available, the value +@w{@code{(time_t)(-1)}} is returned. +@end deftypefun + +@c The GNU C library implements stime() with a call to settimeofday() on +@c Linux. +@comment time.h +@comment SVID, XPG +@deftypefun int stime (const time_t *@var{newtime}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On unix, this is implemented in terms of settimeofday. +@code{stime} sets the system clock, i.e., it tells the system that the +current calendar time is @var{newtime}, where @code{newtime} is +interpreted as described in the above definition of @code{time_t}. + +@code{settimeofday} is a newer function which sets the system clock to +better than one second precision. @code{settimeofday} is generally a +better choice than @code{stime}. @xref{High-Resolution Calendar}. + +Only the superuser can set the system clock. + +If the function succeeds, the return value is zero. Otherwise, it is +@code{-1} and @code{errno} is set accordingly: + +@table @code +@item EPERM +The process is not superuser. +@end table +@end deftypefun + + + +@node High-Resolution Calendar +@subsection High-Resolution Calendar + +The @code{time_t} data type used to represent simple times has a +resolution of only one second. Some applications need more precision. + +So, @theglibc{} also contains functions which are capable of +representing calendar times to a higher resolution than one second. The +functions and the associated data types described in this section are +declared in @file{sys/time.h}. +@pindex sys/time.h + +@comment sys/time.h +@comment BSD +@deftp {Data Type} {struct timezone} +The @code{struct timezone} structure is used to hold minimal information +about the local time zone. It has the following members: + +@table @code +@item int tz_minuteswest +This is the number of minutes west of UTC. + +@item int tz_dsttime +If nonzero, Daylight Saving Time applies during some part of the year. +@end table + +The @code{struct timezone} type is obsolete and should never be used. +Instead, use the facilities described in @ref{Time Zone Functions}. +@end deftp + +@comment sys/time.h +@comment BSD +@deftypefun int gettimeofday (struct timeval *@var{tp}, struct timezone *@var{tzp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On most GNU/Linux systems this is a direct syscall, but the posix/ +@c implementation (not used on GNU/Linux or GNU/Hurd) relies on time and +@c localtime_r, saving and restoring tzname in an unsafe manner. +@c On some GNU/Linux variants, ifunc resolvers are used in shared libc +@c for vdso resolution. ifunc-vdso-revisit. +The @code{gettimeofday} function returns the current calendar time as +the elapsed time since the epoch in the @code{struct timeval} structure +indicated by @var{tp}. (@pxref{Elapsed Time} for a description of +@code{struct timeval}). Information about the time zone is returned in +the structure pointed to by @var{tzp}. If the @var{tzp} argument is a null +pointer, time zone information is ignored. + +The return value is @code{0} on success and @code{-1} on failure. The +following @code{errno} error condition is defined for this function: + +@table @code +@item ENOSYS +The operating system does not support getting time zone information, and +@var{tzp} is not a null pointer. @gnusystems{} do not +support using @w{@code{struct timezone}} to represent time zone +information; that is an obsolete feature of 4.3 BSD. +Instead, use the facilities described in @ref{Time Zone Functions}. +@end table +@end deftypefun + +@comment sys/time.h +@comment BSD +@deftypefun int settimeofday (const struct timeval *@var{tp}, const struct timezone *@var{tzp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On HURD, it calls host_set_time with a privileged port. On other +@c unix systems, it's a syscall. +The @code{settimeofday} function sets the current calendar time in the +system clock according to the arguments. As for @code{gettimeofday}, +the calendar time is represented as the elapsed time since the epoch. +As for @code{gettimeofday}, time zone information is ignored if +@var{tzp} is a null pointer. + +You must be a privileged user in order to use @code{settimeofday}. + +Some kernels automatically set the system clock from some source such as +a hardware clock when they start up. Others, including Linux, place the +system clock in an ``invalid'' state (in which attempts to read the clock +fail). A call of @code{stime} removes the system clock from an invalid +state, and system startup scripts typically run a program that calls +@code{stime}. + +@code{settimeofday} causes a sudden jump forwards or backwards, which +can cause a variety of problems in a system. Use @code{adjtime} (below) +to make a smooth transition from one time to another by temporarily +speeding up or slowing down the clock. + +With a Linux kernel, @code{adjtimex} does the same thing and can also +make permanent changes to the speed of the system clock so it doesn't +need to be corrected as often. + +The return value is @code{0} on success and @code{-1} on failure. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item EPERM +This process cannot set the clock because it is not privileged. + +@item ENOSYS +The operating system does not support setting time zone information, and +@var{tzp} is not a null pointer. +@end table +@end deftypefun + +@c On Linux, GNU libc implements adjtime() as a call to adjtimex(). +@comment sys/time.h +@comment BSD +@deftypefun int adjtime (const struct timeval *@var{delta}, struct timeval *@var{olddelta}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On hurd and mach, call host_adjust_time with a privileged port. On +@c Linux, it's implemented in terms of adjtimex. On other unixen, it's +@c a syscall. +This function speeds up or slows down the system clock in order to make +a gradual adjustment. This ensures that the calendar time reported by +the system clock is always monotonically increasing, which might not +happen if you simply set the clock. + +The @var{delta} argument specifies a relative adjustment to be made to +the clock time. If negative, the system clock is slowed down for a +while until it has lost this much elapsed time. If positive, the system +clock is speeded up for a while. + +If the @var{olddelta} argument is not a null pointer, the @code{adjtime} +function returns information about any previous time adjustment that +has not yet completed. + +This function is typically used to synchronize the clocks of computers +in a local network. You must be a privileged user to use it. + +With a Linux kernel, you can use the @code{adjtimex} function to +permanently change the clock speed. + +The return value is @code{0} on success and @code{-1} on failure. The +following @code{errno} error condition is defined for this function: + +@table @code +@item EPERM +You do not have privilege to set the time. +@end table +@end deftypefun + +@strong{Portability Note:} The @code{gettimeofday}, @code{settimeofday}, +and @code{adjtime} functions are derived from BSD. + + +Symbols for the following function are declared in @file{sys/timex.h}. + +@comment sys/timex.h +@comment GNU +@deftypefun int adjtimex (struct timex *@var{timex}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c It's a syscall, only available on linux. + +@code{adjtimex} is functionally identical to @code{ntp_adjtime}. +@xref{High Accuracy Clock}. + +This function is present only with a Linux kernel. + +@end deftypefun + +@node Broken-down Time +@subsection Broken-down Time +@cindex broken-down time +@cindex calendar time and broken-down time + +Calendar time is represented by the usual @glibcadj{} functions as an +elapsed time since a fixed base calendar time. This is convenient for +computation, but has no relation to the way people normally think of +calendar time. By contrast, @dfn{broken-down time} is a binary +representation of calendar time separated into year, month, day, and so +on. Broken-down time values are not useful for calculations, but they +are useful for printing human readable time information. + +A broken-down time value is always relative to a choice of time +zone, and it also indicates which time zone that is. + +The symbols in this section are declared in the header file @file{time.h}. + +@comment time.h +@comment ISO +@deftp {Data Type} {struct tm} +This is the data type used to represent a broken-down time. The structure +contains at least the following members, which can appear in any order. + +@table @code +@item int tm_sec +This is the number of full seconds since the top of the minute (normally +in the range @code{0} through @code{59}, but the actual upper limit is +@code{60}, to allow for leap seconds if leap second support is +available). +@cindex leap second + +@item int tm_min +This is the number of full minutes since the top of the hour (in the +range @code{0} through @code{59}). + +@item int tm_hour +This is the number of full hours past midnight (in the range @code{0} through +@code{23}). + +@item int tm_mday +This is the ordinal day of the month (in the range @code{1} through @code{31}). +Watch out for this one! As the only ordinal number in the structure, it is +inconsistent with the rest of the structure. + +@item int tm_mon +This is the number of full calendar months since the beginning of the +year (in the range @code{0} through @code{11}). Watch out for this one! +People usually use ordinal numbers for month-of-year (where January = 1). + +@item int tm_year +This is the number of full calendar years since 1900. + +@item int tm_wday +This is the number of full days since Sunday (in the range @code{0} through +@code{6}). + +@item int tm_yday +This is the number of full days since the beginning of the year (in the +range @code{0} through @code{365}). + +@item int tm_isdst +@cindex Daylight Saving Time +@cindex summer time +This is a flag that indicates whether Daylight Saving Time is (or was, or +will be) in effect at the time described. The value is positive if +Daylight Saving Time is in effect, zero if it is not, and negative if the +information is not available. + +@item long int tm_gmtoff +This field describes the time zone that was used to compute this +broken-down time value, including any adjustment for daylight saving; it +is the number of seconds that you must add to UTC to get local time. +You can also think of this as the number of seconds east of UTC. For +example, for U.S. Eastern Standard Time, the value is @code{-5*60*60}. +The @code{tm_gmtoff} field is derived from BSD and is a GNU library +extension; it is not visible in a strict @w{ISO C} environment. + +@item const char *tm_zone +This field is the name for the time zone that was used to compute this +broken-down time value. Like @code{tm_gmtoff}, this field is a BSD and +GNU extension, and is not visible in a strict @w{ISO C} environment. +@end table +@end deftp + + +@comment time.h +@comment ISO +@deftypefun {struct tm *} localtime (const time_t *@var{time}) +@safety{@prelim{}@mtunsafe{@mtasurace{:tmbuf} @mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c Calls tz_convert with a static buffer. +@c localtime @mtasurace:tmbuf @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c tz_convert dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +The @code{localtime} function converts the simple time pointed to by +@var{time} to broken-down time representation, expressed relative to the +user's specified time zone. + +The return value is a pointer to a static broken-down time structure, which +might be overwritten by subsequent calls to @code{ctime}, @code{gmtime}, +or @code{localtime}. (But no other library function overwrites the contents +of this object.) + +The return value is the null pointer if @var{time} cannot be represented +as a broken-down time; typically this is because the year cannot fit into +an @code{int}. + +Calling @code{localtime} also sets the current time zone as if +@code{tzset} were called. @xref{Time Zone Functions}. +@end deftypefun + +Using the @code{localtime} function is a big problem in multi-threaded +programs. The result is returned in a static buffer and this is used in +all threads. POSIX.1c introduced a variant of this function. + +@comment time.h +@comment POSIX.1c +@deftypefun {struct tm *} localtime_r (const time_t *@var{time}, struct tm *@var{resultp}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c localtime_r @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c tz_convert(use_localtime) @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_lock_lock dup @asulock @aculock +@c tzset_internal @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c always called with tzset_lock held +@c sets static is_initialized before initialization; +@c reads and sets old_tz; sets tz_rules. +@c some of the issues only apply on the first call. +@c subsequent calls only trigger these when called by localtime; +@c otherwise, they're ok. +@c getenv dup @mtsenv +@c strcmp dup ok +@c strdup @ascuheap +@c tzfile_read @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c memcmp dup ok +@c strstr dup ok +@c getenv dup @mtsenv +@c asprintf dup @mtslocale @ascuheap @acsmem +@c stat64 dup ok +@c fopen dup @ascuheap @asulock @acsmem @acsfd @aculock +@c fileno dup ok +@c fstat64 dup ok +@c fclose dup @ascuheap @asulock @aculock @acsmem @acsfd +@c free dup @ascuheap @acsmem +@c fsetlocking dup ok [no @mtasurace:stream @asulock, exclusive] +@c fread_unlocked dup ok [no @mtasurace:stream @asucorrupt @acucorrupt] +@c memcpy dup ok +@c decode ok +@c bswap_32 dup ok +@c fseek dup ok [no @mtasurace:stream @asucorrupt @acucorrupt] +@c ftello dup ok [no @mtasurace:stream @asucorrupt @acucorrupt] +@c malloc dup @ascuheap @acsmem +@c decode64 ok +@c bswap_64 dup ok +@c getc_unlocked ok [no @mtasurace:stream @asucorrupt @acucorrupt] +@c tzstring dup @ascuheap @acsmem +@c compute_tzname_max dup ok [guarded by tzset_lock] +@c memset dup ok +@c update_vars ok [guarded by tzset_lock] +@c sets daylight, timezone, tzname and tzname_cur_max; +@c called only with tzset_lock held, unless tzset_parse_tz +@c (internal, but not static) gets called by users; given the its +@c double-underscore-prefixed name, this interface violation could +@c be regarded as undefined behavior. +@c strlen ok +@c tzset_parse_tz @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c sscanf dup @mtslocale @ascuheap @acsmem +@c isalnum dup @mtsenv +@c tzstring @ascuheap @acsmem +@c reads and changes tzstring_list without synchronization, but +@c only called with tzset_lock held (save for interface violations) +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c strcpy dup ok +@c isdigit dup @mtslocale +@c compute_offset ok +@c tzfile_default @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c sets tzname, timezone, types, zone_names, rule_*off, etc; no guards +@c strlen dup ok +@c tzfile_read dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c mempcpy dup ok +@c compute_tzname_max ok [if guarded by tzset_lock] +@c iterates over zone_names; no guards +@c free dup @ascuheap @acsmem +@c strtoul dup @mtslocale +@c update_vars dup ok +@c tzfile_compute(use_localtime) @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c sets tzname; no guards. with !use_localtime, as in gmtime, it's ok +@c tzstring dup @acsuheap @acsmem +@c tzset_parse_tz dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c offtime dup ok +@c tz_compute dup ok +@c strcmp dup ok +@c offtime ok +@c isleap dup ok +@c tz_compute ok +@c compute_change ok +@c isleap ok +@c libc_lock_unlock dup @aculock + +The @code{localtime_r} function works just like the @code{localtime} +function. It takes a pointer to a variable containing a simple time +and converts it to the broken-down time format. + +But the result is not placed in a static buffer. Instead it is placed +in the object of type @code{struct tm} to which the parameter +@var{resultp} points. + +If the conversion is successful the function returns a pointer to the +object the result was written into, i.e., it returns @var{resultp}. +@end deftypefun + + +@comment time.h +@comment ISO +@deftypefun {struct tm *} gmtime (const time_t *@var{time}) +@safety{@prelim{}@mtunsafe{@mtasurace{:tmbuf} @mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c gmtime @mtasurace:tmbuf @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c tz_convert dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +This function is similar to @code{localtime}, except that the broken-down +time is expressed as Coordinated Universal Time (UTC) (formerly called +Greenwich Mean Time (GMT)) rather than relative to a local time zone. + +@end deftypefun + +As for the @code{localtime} function we have the problem that the result +is placed in a static variable. POSIX.1c also provides a replacement for +@code{gmtime}. + +@comment time.h +@comment POSIX.1c +@deftypefun {struct tm *} gmtime_r (const time_t *@var{time}, struct tm *@var{resultp}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c You'd think tz_convert could avoid some safety issues with +@c !use_localtime, but no such luck: tzset_internal will always bring +@c about all possible AS and AC problems when it's first called. +@c Calling any of localtime,gmtime_r once would run the initialization +@c and avoid the heap, mem and fd issues in gmtime* in subsequent calls, +@c but the unsafe locking would remain. +@c gmtime_r @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c tz_convert(gmtime_r) dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +This function is similar to @code{localtime_r}, except that it converts +just like @code{gmtime} the given time as Coordinated Universal Time. + +If the conversion is successful the function returns a pointer to the +object the result was written into, i.e., it returns @var{resultp}. +@end deftypefun + + +@comment time.h +@comment ISO +@deftypefun time_t mktime (struct tm *@var{brokentime}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c mktime @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c passes a static localtime_offset to mktime_internal; it is read +@c once, used as an initial guess, and updated at the end, but not +@c used except as a guess for subsequent calls, so it should be safe. +@c Even though a compiler might delay the load and perform it multiple +@c times (bug 16346), there are at least two unconditional uses of the +@c auto variable in which the first load is stored, separated by a +@c call to an external function, and a conditional change of the +@c variable before the external call, so refraining from allocating a +@c local variable at the first load would be a very bad optimization. +@c tzset dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c mktime_internal(localtime_r) @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c ydhms_diff ok +@c ranged_convert(localtime_r) @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c *convert = localtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c time_t_avg dup ok +@c guess_time_tm dup ok +@c ydhms_diff dup ok +@c time_t_add_ok ok +@c time_t_avg ok +@c isdst_differ ok +@c time_t_int_add_ok ok +The @code{mktime} function converts a broken-down time structure to a +simple time representation. It also normalizes the contents of the +broken-down time structure, and fills in some components based on the +values of the others. + +The @code{mktime} function ignores the specified contents of the +@code{tm_wday}, @code{tm_yday}, @code{tm_gmtoff}, and @code{tm_zone} +members of the broken-down time +structure. It uses the values of the other components to determine the +calendar time; it's permissible for these components to have +unnormalized values outside their normal ranges. The last thing that +@code{mktime} does is adjust the components of the @var{brokentime} +structure, including the members that were initially ignored. + +If the specified broken-down time cannot be represented as a simple time, +@code{mktime} returns a value of @code{(time_t)(-1)} and does not modify +the contents of @var{brokentime}. + +Calling @code{mktime} also sets the current time zone as if +@code{tzset} were called; @code{mktime} uses this information instead +of @var{brokentime}'s initial @code{tm_gmtoff} and @code{tm_zone} +members. @xref{Time Zone Functions}. +@end deftypefun + +@comment time.h +@comment ??? +@deftypefun time_t timelocal (struct tm *@var{brokentime}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c Alias to mktime. + +@code{timelocal} is functionally identical to @code{mktime}, but more +mnemonically named. Note that it is the inverse of the @code{localtime} +function. + +@strong{Portability note:} @code{mktime} is essentially universally +available. @code{timelocal} is rather rare. + +@end deftypefun + +@comment time.h +@comment ??? +@deftypefun time_t timegm (struct tm *@var{brokentime}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c timegm @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c gmtime_offset triggers the same caveats as localtime_offset in mktime. +@c although gmtime_r, as called by mktime, might save some issues, +@c tzset calls tzset_internal with always, which forces +@c reinitialization, so all issues may arise. +@c tzset dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c mktime_internal(gmtime_r) @asulock @aculock +@c ..gmtime_r @asulock @aculock +@c ... dup ok +@c tz_convert(!use_localtime) @asulock @aculock +@c ... dup @asulock @aculock +@c tzfile_compute(!use_localtime) ok + +@code{timegm} is functionally identical to @code{mktime} except it +always takes the input values to be Coordinated Universal Time (UTC) +regardless of any local time zone setting. + +Note that @code{timegm} is the inverse of @code{gmtime}. + +@strong{Portability note:} @code{mktime} is essentially universally +available. @code{timegm} is rather rare. For the most portable +conversion from a UTC broken-down time to a simple time, set +the @code{TZ} environment variable to UTC, call @code{mktime}, then set +@code{TZ} back. + +@end deftypefun + + + +@node High Accuracy Clock +@subsection High Accuracy Clock + +@cindex time, high precision +@cindex clock, high accuracy +@pindex sys/timex.h +@c On Linux, GNU libc implements ntp_gettime() and npt_adjtime() as calls +@c to adjtimex(). +The @code{ntp_gettime} and @code{ntp_adjtime} functions provide an +interface to monitor and manipulate the system clock to maintain high +accuracy time. For example, you can fine tune the speed of the clock +or synchronize it with another time source. + +A typical use of these functions is by a server implementing the Network +Time Protocol to synchronize the clocks of multiple systems and high +precision clocks. + +These functions are declared in @file{sys/timex.h}. + +@tindex struct ntptimeval +@deftp {Data Type} {struct ntptimeval} +This structure is used for information about the system clock. It +contains the following members: +@table @code +@item struct timeval time +This is the current calendar time, expressed as the elapsed time since +the epoch. The @code{struct timeval} data type is described in +@ref{Elapsed Time}. + +@item long int maxerror +This is the maximum error, measured in microseconds. Unless updated +via @code{ntp_adjtime} periodically, this value will reach some +platform-specific maximum value. + +@item long int esterror +This is the estimated error, measured in microseconds. This value can +be set by @code{ntp_adjtime} to indicate the estimated offset of the +system clock from the true calendar time. +@end table +@end deftp + +@comment sys/timex.h +@comment GNU +@deftypefun int ntp_gettime (struct ntptimeval *@var{tptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Wrapper for adjtimex. +The @code{ntp_gettime} function sets the structure pointed to by +@var{tptr} to current values. The elements of the structure afterwards +contain the values the timer implementation in the kernel assumes. They +might or might not be correct. If they are not, an @code{ntp_adjtime} +call is necessary. + +The return value is @code{0} on success and other values on failure. The +following @code{errno} error conditions are defined for this function: + +@vtable @code +@item TIME_ERROR +The precision clock model is not properly set up at the moment, thus the +clock must be considered unsynchronized, and the values should be +treated with care. +@end vtable +@end deftypefun + +@tindex struct timex +@deftp {Data Type} {struct timex} +This structure is used to control and monitor the system clock. It +contains the following members: +@table @code +@item unsigned int modes +This variable controls whether and which values are set. Several +symbolic constants have to be combined with @emph{binary or} to specify +the effective mode. These constants start with @code{MOD_}. + +@item long int offset +This value indicates the current offset of the system clock from the true +calendar time. The value is given in microseconds. If bit +@code{MOD_OFFSET} is set in @code{modes}, the offset (and possibly other +dependent values) can be set. The offset's absolute value must not +exceed @code{MAXPHASE}. + + +@item long int frequency +This value indicates the difference in frequency between the true +calendar time and the system clock. The value is expressed as scaled +PPM (parts per million, 0.0001%). The scaling is @code{1 << +SHIFT_USEC}. The value can be set with bit @code{MOD_FREQUENCY}, but +the absolute value must not exceed @code{MAXFREQ}. + +@item long int maxerror +This is the maximum error, measured in microseconds. A new value can be +set using bit @code{MOD_MAXERROR}. Unless updated via +@code{ntp_adjtime} periodically, this value will increase steadily +and reach some platform-specific maximum value. + +@item long int esterror +This is the estimated error, measured in microseconds. This value can +be set using bit @code{MOD_ESTERROR}. + +@item int status +This variable reflects the various states of the clock machinery. There +are symbolic constants for the significant bits, starting with +@code{STA_}. Some of these flags can be updated using the +@code{MOD_STATUS} bit. + +@item long int constant +This value represents the bandwidth or stiffness of the PLL (phase +locked loop) implemented in the kernel. The value can be changed using +bit @code{MOD_TIMECONST}. + +@item long int precision +This value represents the accuracy or the maximum error when reading the +system clock. The value is expressed in microseconds. + +@item long int tolerance +This value represents the maximum frequency error of the system clock in +scaled PPM. This value is used to increase the @code{maxerror} every +second. + +@item struct timeval time +The current calendar time. + +@item long int tick +The elapsed time between clock ticks in microseconds. A clock tick is a +periodic timer interrupt on which the system clock is based. + +@item long int ppsfreq +This is the first of a few optional variables that are present only if +the system clock can use a PPS (pulse per second) signal to discipline +the system clock. The value is expressed in scaled PPM and it denotes +the difference in frequency between the system clock and the PPS signal. + +@item long int jitter +This value expresses a median filtered average of the PPS signal's +dispersion in microseconds. + +@item int shift +This value is a binary exponent for the duration of the PPS calibration +interval, ranging from @code{PPS_SHIFT} to @code{PPS_SHIFTMAX}. + +@item long int stabil +This value represents the median filtered dispersion of the PPS +frequency in scaled PPM. + +@item long int jitcnt +This counter represents the number of pulses where the jitter exceeded +the allowed maximum @code{MAXTIME}. + +@item long int calcnt +This counter reflects the number of successful calibration intervals. + +@item long int errcnt +This counter represents the number of calibration errors (caused by +large offsets or jitter). + +@item long int stbcnt +This counter denotes the number of calibrations where the stability +exceeded the threshold. +@end table +@end deftp + +@comment sys/timex.h +@comment GNU +@deftypefun int ntp_adjtime (struct timex *@var{tptr}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Alias to adjtimex syscall. +The @code{ntp_adjtime} function sets the structure specified by +@var{tptr} to current values. + +In addition, @code{ntp_adjtime} updates some settings to match what you +pass to it in *@var{tptr}. Use the @code{modes} element of *@var{tptr} +to select what settings to update. You can set @code{offset}, +@code{freq}, @code{maxerror}, @code{esterror}, @code{status}, +@code{constant}, and @code{tick}. + +@code{modes} = zero means set nothing. + +Only the superuser can update settings. + +@c On Linux, ntp_adjtime() also does the adjtime() function if you set +@c modes = ADJ_OFFSET_SINGLESHOT (in fact, that is how GNU libc implements +@c adjtime()). But this should be considered an internal function because +@c it's so inconsistent with the rest of what ntp_adjtime() does and is +@c forced in an ugly way into the struct timex. So we don't document it +@c and instead document adjtime() as the way to achieve the function. + +The return value is @code{0} on success and other values on failure. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item TIME_ERROR +The high accuracy clock model is not properly set up at the moment, thus the +clock must be considered unsynchronized, and the values should be +treated with care. Another reason could be that the specified new values +are not allowed. + +@item EPERM +The process specified a settings update, but is not superuser. + +@end table + +For more details see RFC1305 (Network Time Protocol, Version 3) and +related documents. + +@strong{Portability note:} Early versions of @theglibc{} did not +have this function but did have the synonymous @code{adjtimex}. + +@end deftypefun + + +@node Formatting Calendar Time +@subsection Formatting Calendar Time + +The functions described in this section format calendar time values as +strings. These functions are declared in the header file @file{time.h}. +@pindex time.h + +@comment time.h +@comment ISO +@deftypefun {char *} asctime (const struct tm *@var{brokentime}) +@safety{@prelim{}@mtunsafe{@mtasurace{:asctime} @mtslocale{}}@asunsafe{}@acsafe{}} +@c asctime @mtasurace:asctime @mtslocale +@c Uses a static buffer. +@c asctime_internal @mtslocale +@c snprintf dup @mtslocale [no @acsuheap @acsmem] +@c ab_day_name @mtslocale +@c ab_month_name @mtslocale +The @code{asctime} function converts the broken-down time value that +@var{brokentime} points to into a string in a standard format: + +@smallexample +"Tue May 21 13:46:22 1991\n" +@end smallexample + +The abbreviations for the days of week are: @samp{Sun}, @samp{Mon}, +@samp{Tue}, @samp{Wed}, @samp{Thu}, @samp{Fri}, and @samp{Sat}. + +The abbreviations for the months are: @samp{Jan}, @samp{Feb}, +@samp{Mar}, @samp{Apr}, @samp{May}, @samp{Jun}, @samp{Jul}, @samp{Aug}, +@samp{Sep}, @samp{Oct}, @samp{Nov}, and @samp{Dec}. + +The return value points to a statically allocated string, which might be +overwritten by subsequent calls to @code{asctime} or @code{ctime}. +(But no other library function overwrites the contents of this +string.) +@end deftypefun + +@comment time.h +@comment POSIX.1c +@deftypefun {char *} asctime_r (const struct tm *@var{brokentime}, char *@var{buffer}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@assafe{}@acsafe{}} +@c asctime_r @mtslocale +@c asctime_internal dup @mtslocale +This function is similar to @code{asctime} but instead of placing the +result in a static buffer it writes the string in the buffer pointed to +by the parameter @var{buffer}. This buffer should have room +for at least 26 bytes, including the terminating null. + +If no error occurred the function returns a pointer to the string the +result was written into, i.e., it returns @var{buffer}. Otherwise +it returns @code{NULL}. +@end deftypefun + + +@comment time.h +@comment ISO +@deftypefun {char *} ctime (const time_t *@var{time}) +@safety{@prelim{}@mtunsafe{@mtasurace{:tmbuf} @mtasurace{:asctime} @mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c ctime @mtasurace:tmbuf @mtasurace:asctime @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c localtime dup @mtasurace:tmbuf @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c asctime dup @mtasurace:asctime @mtslocale +The @code{ctime} function is similar to @code{asctime}, except that you +specify the calendar time argument as a @code{time_t} simple time value +rather than in broken-down local time format. It is equivalent to + +@smallexample +asctime (localtime (@var{time})) +@end smallexample + +Calling @code{ctime} also sets the current time zone as if +@code{tzset} were called. @xref{Time Zone Functions}. +@end deftypefun + +@comment time.h +@comment POSIX.1c +@deftypefun {char *} ctime_r (const time_t *@var{time}, char *@var{buffer}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c ctime_r @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c localtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c asctime_r dup @mtslocale +This function is similar to @code{ctime}, but places the result in the +string pointed to by @var{buffer}. It is equivalent to (written using +gcc extensions, @pxref{Statement Exprs,,,gcc,Porting and Using gcc}): + +@smallexample +(@{ struct tm tm; asctime_r (localtime_r (time, &tm), buf); @}) +@end smallexample + +If no error occurred the function returns a pointer to the string the +result was written into, i.e., it returns @var{buffer}. Otherwise +it returns @code{NULL}. +@end deftypefun + + +@comment time.h +@comment ISO +@deftypefun size_t strftime (char *@var{s}, size_t @var{size}, const char *@var{template}, const struct tm *@var{brokentime}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c strftime @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c strftime_l @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c strftime_internal @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c add ok +@c memset_zero dup ok +@c memset_space dup ok +@c strlen dup ok +@c mbrlen @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd [no @mtasurace:mbstate/!ps] +@c mbsinit dup ok +@c cpy ok +@c add dup ok +@c memcpy_lowcase ok +@c TOLOWER ok +@c tolower_l ok +@c memcpy_uppcase ok +@c TOUPPER ok +@c toupper_l ok +@c MEMCPY ok +@c memcpy dup ok +@c ISDIGIT ok +@c STRLEN ok +@c strlen dup ok +@c strftime_internal dup @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c TOUPPER dup ok +@c nl_get_era_entry @ascuheap @asulock @acsmem @aculock +@c nl_init_era_entries @ascuheap @asulock @acsmem @aculock +@c libc_rwlock_wrlock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c memset dup ok +@c free dup @ascuheap @acsmem +@c realloc dup @ascuheap @acsmem +@c memcpy dup ok +@c strchr dup ok +@c wcschr dup ok +@c libc_rwlock_unlock dup @asulock @aculock +@c ERA_DATE_CMP ok +@c DO_NUMBER ok +@c DO_NUMBER_SPACEPAD ok +@c nl_get_alt_digit @ascuheap @asulock @acsmem @aculock +@c libc_rwlock_wrlock dup @asulock @aculock +@c nl_init_alt_digit @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c memset dup ok +@c strchr dup ok +@c libc_rwlock_unlock dup @aculock +@c memset_space ok +@c memset dup ok +@c memset_zero ok +@c memset dup ok +@c mktime dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c iso_week_days ok +@c isleap ok +@c tzset dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c localtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c gmtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c tm_diff ok +This function is similar to the @code{sprintf} function (@pxref{Formatted +Input}), but the conversion specifications that can appear in the format +template @var{template} are specialized for printing components of the date +and time @var{brokentime} according to the locale currently specified for +time conversion (@pxref{Locales}) and the current time zone +(@pxref{Time Zone Functions}). + +Ordinary characters appearing in the @var{template} are copied to the +output string @var{s}; this can include multibyte character sequences. +Conversion specifiers are introduced by a @samp{%} character, followed +by an optional flag which can be one of the following. These flags +are all GNU extensions. The first three affect only the output of +numbers: + +@table @code +@item _ +The number is padded with spaces. + +@item - +The number is not padded at all. + +@item 0 +The number is padded with zeros even if the format specifies padding +with spaces. + +@item ^ +The output uses uppercase characters, but only if this is possible +(@pxref{Case Conversion}). +@end table + +The default action is to pad the number with zeros to keep it a constant +width. Numbers that do not have a range indicated below are never +padded, since there is no natural width for them. + +Following the flag an optional specification of the width is possible. +This is specified in decimal notation. If the natural size of the +output of the field has less than the specified number of characters, +the result is written right adjusted and space padded to the given +size. + +An optional modifier can follow the optional flag and width +specification. The modifiers, which were first standardized by +POSIX.2-1992 and by @w{ISO C99}, are: + +@table @code +@item E +Use the locale's alternate representation for date and time. This +modifier applies to the @code{%c}, @code{%C}, @code{%x}, @code{%X}, +@code{%y} and @code{%Y} format specifiers. In a Japanese locale, for +example, @code{%Ex} might yield a date format based on the Japanese +Emperors' reigns. + +@item O +Use the locale's alternate numeric symbols for numbers. This modifier +applies only to numeric format specifiers. +@end table + +If the format supports the modifier but no alternate representation +is available, it is ignored. + +The conversion specifier ends with a format specifier taken from the +following list. The whole @samp{%} sequence is replaced in the output +string as follows: + +@table @code +@item %a +The abbreviated weekday name according to the current locale. + +@item %A +The full weekday name according to the current locale. + +@item %b +The abbreviated month name according to the current locale. + +@item %B +The full month name according to the current locale. + +Using @code{%B} together with @code{%d} produces grammatically +incorrect results for some locales. + +@item %c +The preferred calendar time representation for the current locale. + +@item %C +The century of the year. This is equivalent to the greatest integer not +greater than the year divided by 100. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %d +The day of the month as a decimal number (range @code{01} through @code{31}). + +@item %D +The date using the format @code{%m/%d/%y}. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %e +The day of the month like with @code{%d}, but padded with spaces (range +@code{ 1} through @code{31}). + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %F +The date using the format @code{%Y-%m-%d}. This is the form specified +in the @w{ISO 8601} standard and is the preferred form for all uses. + +This format was first standardized by @w{ISO C99} and by POSIX.1-2001. + +@item %g +The year corresponding to the ISO week number, but without the century +(range @code{00} through @code{99}). This has the same format and value +as @code{%y}, except that if the ISO week number (see @code{%V}) belongs +to the previous or next year, that year is used instead. + +This format was first standardized by @w{ISO C99} and by POSIX.1-2001. + +@item %G +The year corresponding to the ISO week number. This has the same format +and value as @code{%Y}, except that if the ISO week number (see +@code{%V}) belongs to the previous or next year, that year is used +instead. + +This format was first standardized by @w{ISO C99} and by POSIX.1-2001 +but was previously available as a GNU extension. + +@item %h +The abbreviated month name according to the current locale. The action +is the same as for @code{%b}. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %H +The hour as a decimal number, using a 24-hour clock (range @code{00} through +@code{23}). + +@item %I +The hour as a decimal number, using a 12-hour clock (range @code{01} through +@code{12}). + +@item %j +The day of the year as a decimal number (range @code{001} through @code{366}). + +@item %k +The hour as a decimal number, using a 24-hour clock like @code{%H}, but +padded with spaces (range @code{ 0} through @code{23}). + +This format is a GNU extension. + +@item %l +The hour as a decimal number, using a 12-hour clock like @code{%I}, but +padded with spaces (range @code{ 1} through @code{12}). + +This format is a GNU extension. + +@item %m +The month as a decimal number (range @code{01} through @code{12}). + +@item %M +The minute as a decimal number (range @code{00} through @code{59}). + +@item %n +A single @samp{\n} (newline) character. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %p +Either @samp{AM} or @samp{PM}, according to the given time value; or the +corresponding strings for the current locale. Noon is treated as +@samp{PM} and midnight as @samp{AM}. In most locales +@samp{AM}/@samp{PM} format is not supported, in such cases @code{"%p"} +yields an empty string. + +@ignore +We currently have a problem with makeinfo. Write @samp{AM} and @samp{am} +both results in `am'. I.e., the difference in case is not visible anymore. +@end ignore +@item %P +Either @samp{am} or @samp{pm}, according to the given time value; or the +corresponding strings for the current locale, printed in lowercase +characters. Noon is treated as @samp{pm} and midnight as @samp{am}. In +most locales @samp{AM}/@samp{PM} format is not supported, in such cases +@code{"%P"} yields an empty string. + +This format is a GNU extension. + +@item %r +The complete calendar time using the AM/PM format of the current locale. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. +In the POSIX locale, this format is equivalent to @code{%I:%M:%S %p}. + +@item %R +The hour and minute in decimal numbers using the format @code{%H:%M}. + +This format was first standardized by @w{ISO C99} and by POSIX.1-2001 +but was previously available as a GNU extension. + +@item %s +The number of seconds since the epoch, i.e., since 1970-01-01 00:00:00 UTC. +Leap seconds are not counted unless leap second support is available. + +This format is a GNU extension. + +@item %S +The seconds as a decimal number (range @code{00} through @code{60}). + +@item %t +A single @samp{\t} (tabulator) character. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %T +The time of day using decimal numbers using the format @code{%H:%M:%S}. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %u +The day of the week as a decimal number (range @code{1} through +@code{7}), Monday being @code{1}. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %U +The week number of the current year as a decimal number (range @code{00} +through @code{53}), starting with the first Sunday as the first day of +the first week. Days preceding the first Sunday in the year are +considered to be in week @code{00}. + +@item %V +The @w{ISO 8601:1988} week number as a decimal number (range @code{01} +through @code{53}). ISO weeks start with Monday and end with Sunday. +Week @code{01} of a year is the first week which has the majority of its +days in that year; this is equivalent to the week containing the year's +first Thursday, and it is also equivalent to the week containing January +4. Week @code{01} of a year can contain days from the previous year. +The week before week @code{01} of a year is the last week (@code{52} or +@code{53}) of the previous year even if it contains days from the new +year. + +This format was first standardized by POSIX.2-1992 and by @w{ISO C99}. + +@item %w +The day of the week as a decimal number (range @code{0} through +@code{6}), Sunday being @code{0}. + +@item %W +The week number of the current year as a decimal number (range @code{00} +through @code{53}), starting with the first Monday as the first day of +the first week. All days preceding the first Monday in the year are +considered to be in week @code{00}. + +@item %x +The preferred date representation for the current locale. + +@item %X +The preferred time of day representation for the current locale. + +@item %y +The year without a century as a decimal number (range @code{00} through +@code{99}). This is equivalent to the year modulo 100. + +@item %Y +The year as a decimal number, using the Gregorian calendar. Years +before the year @code{1} are numbered @code{0}, @code{-1}, and so on. + +@item %z +@w{RFC 822}/@w{ISO 8601:1988} style numeric time zone (e.g., +@code{-0600} or @code{+0100}), or nothing if no time zone is +determinable. + +This format was first standardized by @w{ISO C99} and by POSIX.1-2001 +but was previously available as a GNU extension. + +In the POSIX locale, a full @w{RFC 822} timestamp is generated by the format +@w{@samp{"%a, %d %b %Y %H:%M:%S %z"}} (or the equivalent +@w{@samp{"%a, %d %b %Y %T %z"}}). + +@item %Z +The time zone abbreviation (empty if the time zone can't be determined). + +@item %% +A literal @samp{%} character. +@end table + +The @var{size} parameter can be used to specify the maximum number of +characters to be stored in the array @var{s}, including the terminating +null character. If the formatted time requires more than @var{size} +characters, @code{strftime} returns zero and the contents of the array +@var{s} are undefined. Otherwise the return value indicates the +number of characters placed in the array @var{s}, not including the +terminating null character. + +@emph{Warning:} This convention for the return value which is prescribed +in @w{ISO C} can lead to problems in some situations. For certain +format strings and certain locales the output really can be the empty +string and this cannot be discovered by testing the return value only. +E.g., in most locales the AM/PM time format is not supported (most of +the world uses the 24 hour time representation). In such locales +@code{"%p"} will return the empty string, i.e., the return value is +zero. To detect situations like this something similar to the following +code should be used: + +@smallexample +buf[0] = '\1'; +len = strftime (buf, bufsize, format, tp); +if (len == 0 && buf[0] != '\0') + @{ + /* Something went wrong in the strftime call. */ + @dots{} + @} +@end smallexample + +If @var{s} is a null pointer, @code{strftime} does not actually write +anything, but instead returns the number of characters it would have written. + +Calling @code{strftime} also sets the current time zone as if +@code{tzset} were called; @code{strftime} uses this information +instead of @var{brokentime}'s @code{tm_gmtoff} and @code{tm_zone} +members. @xref{Time Zone Functions}. + +For an example of @code{strftime}, see @ref{Time Functions Example}. +@end deftypefun + +@comment time.h +@comment ISO/Amend1 +@deftypefun size_t wcsftime (wchar_t *@var{s}, size_t @var{size}, const wchar_t *@var{template}, const struct tm *@var{brokentime}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@asucorrupt{} @ascuheap{} @asulock{} @ascudlopen{}}@acunsafe{@acucorrupt{} @aculock{} @acsmem{} @acsfd{}}} +@c wcsftime @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c wcsftime_l @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c wcsftime_internal @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c add ok +@c memset_zero dup ok +@c memset_space dup ok +@c wcslen dup ok +@c cpy ok +@c add dup ok +@c memcpy_lowcase ok +@c TOLOWER ok +@c towlower_l dup ok +@c memcpy_uppcase ok +@c TOUPPER ok +@c towupper_l dup ok +@c MEMCPY ok +@c wmemcpy dup ok +@c widen @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c memset dup ok +@c mbsrtowcs_l @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd [no @mtasurace:mbstate/!ps] +@c ISDIGIT ok +@c STRLEN ok +@c wcslen dup ok +@c wcsftime_internal dup @mtsenv @mtslocale @asucorrupt @ascuheap @asulock @ascudlopen @acucorrupt @aculock @acsmem @acsfd +@c TOUPPER dup ok +@c nl_get_era_entry dup @ascuheap @asulock @acsmem @aculock +@c DO_NUMBER ok +@c DO_NUMBER_SPACEPAD ok +@c nl_get_walt_digit dup @ascuheap @asulock @acsmem @aculock +@c libc_rwlock_wrlock dup @asulock @aculock +@c nl_init_alt_digit dup @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c memset dup ok +@c wcschr dup ok +@c libc_rwlock_unlock dup @aculock +@c memset_space ok +@c wmemset dup ok +@c memset_zero ok +@c wmemset dup ok +@c mktime dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c iso_week_days ok +@c isleap ok +@c tzset dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c localtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c gmtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c tm_diff ok +The @code{wcsftime} function is equivalent to the @code{strftime} +function with the difference that it operates on wide character +strings. The buffer where the result is stored, pointed to by @var{s}, +must be an array of wide characters. The parameter @var{size} which +specifies the size of the output buffer gives the number of wide +characters, not the number of bytes. + +Also the format string @var{template} is a wide character string. Since +all characters needed to specify the format string are in the basic +character set it is portably possible to write format strings in the C +source code using the @code{L"@dots{}"} notation. The parameter +@var{brokentime} has the same meaning as in the @code{strftime} call. + +The @code{wcsftime} function supports the same flags, modifiers, and +format specifiers as the @code{strftime} function. + +The return value of @code{wcsftime} is the number of wide characters +stored in @code{s}. When more characters would have to be written than +can be placed in the buffer @var{s} the return value is zero, with the +same problems indicated in the @code{strftime} documentation. +@end deftypefun + +@node Parsing Date and Time +@subsection Convert textual time and date information back + +The @w{ISO C} standard does not specify any functions which can convert +the output of the @code{strftime} function back into a binary format. +This led to a variety of more-or-less successful implementations with +different interfaces over the years. Then the Unix standard was +extended by the addition of two functions: @code{strptime} and +@code{getdate}. Both have strange interfaces but at least they are +widely available. + +@menu +* Low-Level Time String Parsing:: Interpret string according to given format. +* General Time String Parsing:: User-friendly function to parse data and + time strings. +@end menu + +@node Low-Level Time String Parsing +@subsubsection Interpret string according to given format + +The first function is rather low-level. It is nevertheless frequently +used in software since it is better known. Its interface and +implementation are heavily influenced by the @code{getdate} function, +which is defined and implemented in terms of calls to @code{strptime}. + +@comment time.h +@comment XPG4 +@deftypefun {char *} strptime (const char *@var{s}, const char *@var{fmt}, struct tm *@var{tp}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c strptime @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c strptime_internal @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c memset dup ok +@c ISSPACE ok +@c isspace_l dup ok +@c match_char ok +@c match_string ok +@c strlen dup ok +@c strncasecmp_l dup ok +@c strcmp dup ok +@c recursive @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c strptime_internal dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c get_number ok +@c ISSPACE dup ok +@c localtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c nl_select_era_entry @ascuheap @asulock @acsmem @aculock +@c nl_init_era_entries dup @ascuheap @asulock @acsmem @aculock +@c get_alt_number dup @ascuheap @asulock @acsmem @aculock +@c nl_parse_alt_digit dup @ascuheap @asulock @acsmem @aculock +@c libc_rwlock_wrlock dup @asulock @aculock +@c nl_init_alt_digit dup @ascuheap @acsmem +@c libc_rwlock_unlock dup @aculock +@c get_number dup ok +@c day_of_the_week ok +@c day_of_the_year ok +The @code{strptime} function parses the input string @var{s} according +to the format string @var{fmt} and stores its results in the +structure @var{tp}. + +The input string could be generated by a @code{strftime} call or +obtained any other way. It does not need to be in a human-recognizable +format; e.g. a date passed as @code{"02:1999:9"} is acceptable, even +though it is ambiguous without context. As long as the format string +@var{fmt} matches the input string the function will succeed. + +The user has to make sure, though, that the input can be parsed in a +unambiguous way. The string @code{"1999112"} can be parsed using the +format @code{"%Y%m%d"} as 1999-1-12, 1999-11-2, or even 19991-1-2. It +is necessary to add appropriate separators to reliably get results. + +The format string consists of the same components as the format string +of the @code{strftime} function. The only difference is that the flags +@code{_}, @code{-}, @code{0}, and @code{^} are not allowed. +@comment Is this really the intention? --drepper +Several of the distinct formats of @code{strftime} do the same work in +@code{strptime} since differences like case of the input do not matter. +For reasons of symmetry all formats are supported, though. + +The modifiers @code{E} and @code{O} are also allowed everywhere the +@code{strftime} function allows them. + +The formats are: + +@table @code +@item %a +@itemx %A +The weekday name according to the current locale, in abbreviated form or +the full name. + +@item %b +@itemx %B +@itemx %h +The month name according to the current locale, in abbreviated form or +the full name. + +@item %c +The date and time representation for the current locale. + +@item %Ec +Like @code{%c} but the locale's alternative date and time format is used. + +@item %C +The century of the year. + +It makes sense to use this format only if the format string also +contains the @code{%y} format. + +@item %EC +The locale's representation of the period. + +Unlike @code{%C} it sometimes makes sense to use this format since some +cultures represent years relative to the beginning of eras instead of +using the Gregorian years. + +@item %d +@item %e +The day of the month as a decimal number (range @code{1} through @code{31}). +Leading zeroes are permitted but not required. + +@item %Od +@itemx %Oe +Same as @code{%d} but using the locale's alternative numeric symbols. + +Leading zeroes are permitted but not required. + +@item %D +Equivalent to @code{%m/%d/%y}. + +@item %F +Equivalent to @code{%Y-%m-%d}, which is the @w{ISO 8601} date +format. + +This is a GNU extension following an @w{ISO C99} extension to +@code{strftime}. + +@item %g +The year corresponding to the ISO week number, but without the century +(range @code{00} through @code{99}). + +@emph{Note:} Currently, this is not fully implemented. The format is +recognized, input is consumed but no field in @var{tm} is set. + +This format is a GNU extension following a GNU extension of @code{strftime}. + +@item %G +The year corresponding to the ISO week number. + +@emph{Note:} Currently, this is not fully implemented. The format is +recognized, input is consumed but no field in @var{tm} is set. + +This format is a GNU extension following a GNU extension of @code{strftime}. + +@item %H +@itemx %k +The hour as a decimal number, using a 24-hour clock (range @code{00} through +@code{23}). + +@code{%k} is a GNU extension following a GNU extension of @code{strftime}. + +@item %OH +Same as @code{%H} but using the locale's alternative numeric symbols. + +@item %I +@itemx %l +The hour as a decimal number, using a 12-hour clock (range @code{01} through +@code{12}). + +@code{%l} is a GNU extension following a GNU extension of @code{strftime}. + +@item %OI +Same as @code{%I} but using the locale's alternative numeric symbols. + +@item %j +The day of the year as a decimal number (range @code{1} through @code{366}). + +Leading zeroes are permitted but not required. + +@item %m +The month as a decimal number (range @code{1} through @code{12}). + +Leading zeroes are permitted but not required. + +@item %Om +Same as @code{%m} but using the locale's alternative numeric symbols. + +@item %M +The minute as a decimal number (range @code{0} through @code{59}). + +Leading zeroes are permitted but not required. + +@item %OM +Same as @code{%M} but using the locale's alternative numeric symbols. + +@item %n +@itemx %t +Matches any white space. + +@item %p +@item %P +The locale-dependent equivalent to @samp{AM} or @samp{PM}. + +This format is not useful unless @code{%I} or @code{%l} is also used. +Another complication is that the locale might not define these values at +all and therefore the conversion fails. + +@code{%P} is a GNU extension following a GNU extension to @code{strftime}. + +@item %r +The complete time using the AM/PM format of the current locale. + +A complication is that the locale might not define this format at all +and therefore the conversion fails. + +@item %R +The hour and minute in decimal numbers using the format @code{%H:%M}. + +@code{%R} is a GNU extension following a GNU extension to @code{strftime}. + +@item %s +The number of seconds since the epoch, i.e., since 1970-01-01 00:00:00 UTC. +Leap seconds are not counted unless leap second support is available. + +@code{%s} is a GNU extension following a GNU extension to @code{strftime}. + +@item %S +The seconds as a decimal number (range @code{0} through @code{60}). + +Leading zeroes are permitted but not required. + +@strong{NB:} The Unix specification says the upper bound on this value +is @code{61}, a result of a decision to allow double leap seconds. You +will not see the value @code{61} because no minute has more than one +leap second, but the myth persists. + +@item %OS +Same as @code{%S} but using the locale's alternative numeric symbols. + +@item %T +Equivalent to the use of @code{%H:%M:%S} in this place. + +@item %u +The day of the week as a decimal number (range @code{1} through +@code{7}), Monday being @code{1}. + +Leading zeroes are permitted but not required. + +@emph{Note:} Currently, this is not fully implemented. The format is +recognized, input is consumed but no field in @var{tm} is set. + +@item %U +The week number of the current year as a decimal number (range @code{0} +through @code{53}). + +Leading zeroes are permitted but not required. + +@item %OU +Same as @code{%U} but using the locale's alternative numeric symbols. + +@item %V +The @w{ISO 8601:1988} week number as a decimal number (range @code{1} +through @code{53}). + +Leading zeroes are permitted but not required. + +@emph{Note:} Currently, this is not fully implemented. The format is +recognized, input is consumed but no field in @var{tm} is set. + +@item %w +The day of the week as a decimal number (range @code{0} through +@code{6}), Sunday being @code{0}. + +Leading zeroes are permitted but not required. + +@emph{Note:} Currently, this is not fully implemented. The format is +recognized, input is consumed but no field in @var{tm} is set. + +@item %Ow +Same as @code{%w} but using the locale's alternative numeric symbols. + +@item %W +The week number of the current year as a decimal number (range @code{0} +through @code{53}). + +Leading zeroes are permitted but not required. + +@emph{Note:} Currently, this is not fully implemented. The format is +recognized, input is consumed but no field in @var{tm} is set. + +@item %OW +Same as @code{%W} but using the locale's alternative numeric symbols. + +@item %x +The date using the locale's date format. + +@item %Ex +Like @code{%x} but the locale's alternative data representation is used. + +@item %X +The time using the locale's time format. + +@item %EX +Like @code{%X} but the locale's alternative time representation is used. + +@item %y +The year without a century as a decimal number (range @code{0} through +@code{99}). + +Leading zeroes are permitted but not required. + +Note that it is questionable to use this format without +the @code{%C} format. The @code{strptime} function does regard input +values in the range @math{68} to @math{99} as the years @math{1969} to +@math{1999} and the values @math{0} to @math{68} as the years +@math{2000} to @math{2068}. But maybe this heuristic fails for some +input data. + +Therefore it is best to avoid @code{%y} completely and use @code{%Y} +instead. + +@item %Ey +The offset from @code{%EC} in the locale's alternative representation. + +@item %Oy +The offset of the year (from @code{%C}) using the locale's alternative +numeric symbols. + +@item %Y +The year as a decimal number, using the Gregorian calendar. + +@item %EY +The full alternative year representation. + +@item %z +The offset from GMT in @w{ISO 8601}/RFC822 format. + +@item %Z +The timezone name. + +@emph{Note:} Currently, this is not fully implemented. The format is +recognized, input is consumed but no field in @var{tm} is set. + +@item %% +A literal @samp{%} character. +@end table + +All other characters in the format string must have a matching character +in the input string. Exceptions are white spaces in the input string +which can match zero or more whitespace characters in the format string. + +@strong{Portability Note:} The XPG standard advises applications to use +at least one whitespace character (as specified by @code{isspace}) or +other non-alphanumeric characters between any two conversion +specifications. @Theglibc{} does not have this limitation but +other libraries might have trouble parsing formats like +@code{"%d%m%Y%H%M%S"}. + +The @code{strptime} function processes the input string from right to +left. Each of the three possible input elements (white space, literal, +or format) are handled one after the other. If the input cannot be +matched to the format string the function stops. The remainder of the +format and input strings are not processed. + +The function returns a pointer to the first character it was unable to +process. If the input string contains more characters than required by +the format string the return value points right after the last consumed +input character. If the whole input string is consumed the return value +points to the @code{NULL} byte at the end of the string. If an error +occurs, i.e., @code{strptime} fails to match all of the format string, +the function returns @code{NULL}. +@end deftypefun + +The specification of the function in the XPG standard is rather vague, +leaving out a few important pieces of information. Most importantly, it +does not specify what happens to those elements of @var{tm} which are +not directly initialized by the different formats. The +implementations on different Unix systems vary here. + +The @glibcadj{} implementation does not touch those fields which are not +directly initialized. Exceptions are the @code{tm_wday} and +@code{tm_yday} elements, which are recomputed if any of the year, month, +or date elements changed. This has two implications: + +@itemize @bullet +@item +Before calling the @code{strptime} function for a new input string, you +should prepare the @var{tm} structure you pass. Normally this will mean +initializing all values to zero. Alternatively, you can set all +fields to values like @code{INT_MAX}, allowing you to determine which +elements were set by the function call. Zero does not work here since +it is a valid value for many of the fields. + +Careful initialization is necessary if you want to find out whether a +certain field in @var{tm} was initialized by the function call. + +@item +You can construct a @code{struct tm} value with several consecutive +@code{strptime} calls. A useful application of this is e.g. the parsing +of two separate strings, one containing date information and the other +time information. By parsing one after the other without clearing the +structure in-between, you can construct a complete broken-down time. +@end itemize + +The following example shows a function which parses a string which +contains the date information in either US style or @w{ISO 8601} form: + +@smallexample +const char * +parse_date (const char *input, struct tm *tm) +@{ + const char *cp; + + /* @r{First clear the result structure.} */ + memset (tm, '\0', sizeof (*tm)); + + /* @r{Try the ISO format first.} */ + cp = strptime (input, "%F", tm); + if (cp == NULL) + @{ + /* @r{Does not match. Try the US form.} */ + cp = strptime (input, "%D", tm); + @} + + return cp; +@} +@end smallexample + +@node General Time String Parsing +@subsubsection A More User-friendly Way to Parse Times and Dates + +The Unix standard defines another function for parsing date strings. +The interface is weird, but if the function happens to suit your +application it is just fine. It is problematic to use this function +in multi-threaded programs or libraries, since it returns a pointer to +a static variable, and uses a global variable and global state (an +environment variable). + +@comment time.h +@comment Unix98 +@defvar getdate_err +This variable of type @code{int} contains the error code of the last +unsuccessful call to @code{getdate}. Defined values are: + +@table @math +@item 1 +The environment variable @code{DATEMSK} is not defined or null. +@item 2 +The template file denoted by the @code{DATEMSK} environment variable +cannot be opened. +@item 3 +Information about the template file cannot retrieved. +@item 4 +The template file is not a regular file. +@item 5 +An I/O error occurred while reading the template file. +@item 6 +Not enough memory available to execute the function. +@item 7 +The template file contains no matching template. +@item 8 +The input date is invalid, but would match a template otherwise. This +includes dates like February 31st, and dates which cannot be represented +in a @code{time_t} variable. +@end table +@end defvar + +@comment time.h +@comment Unix98 +@deftypefun {struct tm *} getdate (const char *@var{string}) +@safety{@prelim{}@mtunsafe{@mtasurace{:getdate} @mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c getdate @mtasurace:getdate @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c getdate_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +The interface to @code{getdate} is the simplest possible for a function +to parse a string and return the value. @var{string} is the input +string and the result is returned in a statically-allocated variable. + +The details about how the string is processed are hidden from the user. +In fact, they can be outside the control of the program. Which formats +are recognized is controlled by the file named by the environment +variable @code{DATEMSK}. This file should contain +lines of valid format strings which could be passed to @code{strptime}. + +The @code{getdate} function reads these format strings one after the +other and tries to match the input string. The first line which +completely matches the input string is used. + +Elements not initialized through the format string retain the values +present at the time of the @code{getdate} function call. + +The formats recognized by @code{getdate} are the same as for +@code{strptime}. See above for an explanation. There are only a few +extensions to the @code{strptime} behavior: + +@itemize @bullet +@item +If the @code{%Z} format is given the broken-down time is based on the +current time of the timezone matched, not of the current timezone of the +runtime environment. + +@emph{Note}: This is not implemented (currently). The problem is that +timezone names are not unique. If a fixed timezone is assumed for a +given string (say @code{EST} meaning US East Coast time), then uses for +countries other than the USA will fail. So far we have found no good +solution to this. + +@item +If only the weekday is specified the selected day depends on the current +date. If the current weekday is greater than or equal to the @code{tm_wday} +value the current week's day is chosen, otherwise the day next week is chosen. + +@item +A similar heuristic is used when only the month is given and not the +year. If the month is greater than or equal to the current month, then +the current year is used. Otherwise it wraps to next year. The first +day of the month is assumed if one is not explicitly specified. + +@item +The current hour, minute, and second are used if the appropriate value is +not set through the format. + +@item +If no date is given tomorrow's date is used if the time is +smaller than the current time. Otherwise today's date is taken. +@end itemize + +It should be noted that the format in the template file need not only +contain format elements. The following is a list of possible format +strings (taken from the Unix standard): + +@smallexample +%m +%A %B %d, %Y %H:%M:%S +%A +%B +%m/%d/%y %I %p +%d,%m,%Y %H:%M +at %A the %dst of %B in %Y +run job at %I %p,%B %dnd +%A den %d. %B %Y %H.%M Uhr +@end smallexample + +As you can see, the template list can contain very specific strings like +@code{run job at %I %p,%B %dnd}. Using the above list of templates and +assuming the current time is Mon Sep 22 12:19:47 EDT 1986, we can obtain the +following results for the given input. + +@multitable {xxxxxxxxxxxx} {xxxxxxxxxx} {xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx} +@item Input @tab Match @tab Result +@item Mon @tab %a @tab Mon Sep 22 12:19:47 EDT 1986 +@item Sun @tab %a @tab Sun Sep 28 12:19:47 EDT 1986 +@item Fri @tab %a @tab Fri Sep 26 12:19:47 EDT 1986 +@item September @tab %B @tab Mon Sep 1 12:19:47 EDT 1986 +@item January @tab %B @tab Thu Jan 1 12:19:47 EST 1987 +@item December @tab %B @tab Mon Dec 1 12:19:47 EST 1986 +@item Sep Mon @tab %b %a @tab Mon Sep 1 12:19:47 EDT 1986 +@item Jan Fri @tab %b %a @tab Fri Jan 2 12:19:47 EST 1987 +@item Dec Mon @tab %b %a @tab Mon Dec 1 12:19:47 EST 1986 +@item Jan Wed 1989 @tab %b %a %Y @tab Wed Jan 4 12:19:47 EST 1989 +@item Fri 9 @tab %a %H @tab Fri Sep 26 09:00:00 EDT 1986 +@item Feb 10:30 @tab %b %H:%S @tab Sun Feb 1 10:00:30 EST 1987 +@item 10:30 @tab %H:%M @tab Tue Sep 23 10:30:00 EDT 1986 +@item 13:30 @tab %H:%M @tab Mon Sep 22 13:30:00 EDT 1986 +@end multitable + +The return value of the function is a pointer to a static variable of +type @w{@code{struct tm}}, or a null pointer if an error occurred. The +result is only valid until the next @code{getdate} call, making this +function unusable in multi-threaded applications. + +The @code{errno} variable is @emph{not} changed. Error conditions are +stored in the global variable @code{getdate_err}. See the +description above for a list of the possible error values. + +@emph{Warning:} The @code{getdate} function should @emph{never} be +used in SUID-programs. The reason is obvious: using the +@code{DATEMSK} environment variable you can get the function to open +any arbitrary file and chances are high that with some bogus input +(such as a binary file) the program will crash. +@end deftypefun + +@comment time.h +@comment GNU +@deftypefun int getdate_r (const char *@var{string}, struct tm *@var{tp}) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c getdate_r @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c getenv dup @mtsenv +@c stat64 dup ok +@c access dup ok +@c fopen dup @ascuheap @asulock @acsmem @acsfd @aculock +@c fsetlocking dup ok [no @mtasurace:stream @asulock, exclusive] +@c isspace dup @mtslocale +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c fclose dup @ascuheap @asulock @aculock @acsmem @acsfd +@c memcpy dup ok +@c getline dup @ascuheap @acsmem [no @asucorrupt @aculock @acucorrupt, exclusive] +@c strptime dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c feof_unlocked dup ok +@c free dup @ascuheap @acsmem +@c ferror_unlocked dup dup ok +@c time dup ok +@c localtime_r dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c first_wday @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c memset dup ok +@c mktime dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c check_mday ok +@c mktime dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +The @code{getdate_r} function is the reentrant counterpart of +@code{getdate}. It does not use the global variable @code{getdate_err} +to signal an error, but instead returns an error code. The same error +codes as described in the @code{getdate_err} documentation above are +used, with 0 meaning success. + +Moreover, @code{getdate_r} stores the broken-down time in the variable +of type @code{struct tm} pointed to by the second argument, rather than +in a static variable. + +This function is not defined in the Unix standard. Nevertheless it is +available on some other Unix systems as well. + +The warning against using @code{getdate} in SUID-programs applies to +@code{getdate_r} as well. +@end deftypefun + +@node TZ Variable +@subsection Specifying the Time Zone with @code{TZ} + +In POSIX systems, a user can specify the time zone by means of the +@code{TZ} environment variable. For information about how to set +environment variables, see @ref{Environment Variables}. The functions +for accessing the time zone are declared in @file{time.h}. +@pindex time.h +@cindex time zone + +You should not normally need to set @code{TZ}. If the system is +configured properly, the default time zone will be correct. You might +set @code{TZ} if you are using a computer over a network from a +different time zone, and would like times reported to you in the time +zone local to you, rather than what is local to the computer. + +In POSIX.1 systems the value of the @code{TZ} variable can be in one of +three formats. With @theglibc{}, the most common format is the +last one, which can specify a selection from a large database of time +zone information for many regions of the world. The first two formats +are used to describe the time zone information directly, which is both +more cumbersome and less precise. But the POSIX.1 standard only +specifies the details of the first two formats, so it is good to be +familiar with them in case you come across a POSIX.1 system that doesn't +support a time zone information database. + +The first format is used when there is no Daylight Saving Time (or +summer time) in the local time zone: + +@smallexample +@r{@var{std} @var{offset}} +@end smallexample + +The @var{std} string specifies the name of the time zone. It must be +three or more characters long and must not contain a leading colon, +embedded digits, commas, nor plus and minus signs. There is no space +character separating the time zone name from the @var{offset}, so these +restrictions are necessary to parse the specification correctly. + +The @var{offset} specifies the time value you must add to the local time +to get a Coordinated Universal Time value. It has syntax like +[@code{+}|@code{-}]@var{hh}[@code{:}@var{mm}[@code{:}@var{ss}]]. This +is positive if the local time zone is west of the Prime Meridian and +negative if it is east. The hour must be between @code{0} and +@code{24}, and the minute and seconds between @code{0} and @code{59}. + +For example, here is how we would specify Eastern Standard Time, but +without any Daylight Saving Time alternative: + +@smallexample +EST+5 +@end smallexample + +The second format is used when there is Daylight Saving Time: + +@smallexample +@r{@var{std} @var{offset} @var{dst} [@var{offset}]@code{,}@var{start}[@code{/}@var{time}]@code{,}@var{end}[@code{/}@var{time}]} +@end smallexample + +The initial @var{std} and @var{offset} specify the standard time zone, as +described above. The @var{dst} string and @var{offset} specify the name +and offset for the corresponding Daylight Saving Time zone; if the +@var{offset} is omitted, it defaults to one hour ahead of standard time. + +The remainder of the specification describes when Daylight Saving Time is +in effect. The @var{start} field is when Daylight Saving Time goes into +effect and the @var{end} field is when the change is made back to standard +time. The following formats are recognized for these fields: + +@table @code +@item J@var{n} +This specifies the Julian day, with @var{n} between @code{1} and @code{365}. +February 29 is never counted, even in leap years. + +@item @var{n} +This specifies the Julian day, with @var{n} between @code{0} and @code{365}. +February 29 is counted in leap years. + +@item M@var{m}.@var{w}.@var{d} +This specifies day @var{d} of week @var{w} of month @var{m}. The day +@var{d} must be between @code{0} (Sunday) and @code{6}. The week +@var{w} must be between @code{1} and @code{5}; week @code{1} is the +first week in which day @var{d} occurs, and week @code{5} specifies the +@emph{last} @var{d} day in the month. The month @var{m} should be +between @code{1} and @code{12}. +@end table + +The @var{time} fields specify when, in the local time currently in +effect, the change to the other time occurs. If omitted, the default is +@code{02:00:00}. The hours part of the time fields can range from +@minus{}167 through 167; this is an extension to POSIX.1, which allows +only the range 0 through 24. + +Here are some example @code{TZ} values, including the appropriate +Daylight Saving Time and its dates of applicability. In North +American Eastern Standard Time (EST) and Eastern Daylight Time (EDT), +the normal offset from UTC is 5 hours; since this is +west of the prime meridian, the sign is positive. Summer time begins on +March's second Sunday at 2:00am, and ends on November's first Sunday +at 2:00am. + +@smallexample +EST+5EDT,M3.2.0/2,M11.1.0/2 +@end smallexample + +Israel Standard Time (IST) and Israel Daylight Time (IDT) are 2 hours +ahead of the prime meridian in winter, springing forward an hour on +March's fourth Thursday at 26:00 (i.e., 02:00 on the first Friday on or +after March 23), and falling back on October's last Sunday at 02:00. + +@smallexample +IST-2IDT,M3.4.4/26,M10.5.0 +@end smallexample + +Western Argentina Summer Time (WARST) is 3 hours behind the prime +meridian all year. There is a dummy fall-back transition on December +31 at 25:00 daylight saving time (i.e., 24:00 standard time, +equivalent to January 1 at 00:00 standard time), and a simultaneous +spring-forward transition on January 1 at 00:00 standard time, so +daylight saving time is in effect all year and the initial @code{WART} +is a placeholder. + +@smallexample +WART4WARST,J1/0,J365/25 +@end smallexample + +Western Greenland Time (WGT) and Western Greenland Summer Time (WGST) +are 3 hours behind UTC in the winter. Its clocks follow the European +Union rules of springing forward by one hour on March's last Sunday at +01:00 UTC (@minus{}02:00 local time) and falling back on October's +last Sunday at 01:00 UTC (@minus{}01:00 local time). + +@smallexample +WGT3WGST,M3.5.0/-2,M10.5.0/-1 +@end smallexample + +The schedule of Daylight Saving Time in any particular jurisdiction has +changed over the years. To be strictly correct, the conversion of dates +and times in the past should be based on the schedule that was in effect +then. However, this format has no facilities to let you specify how the +schedule has changed from year to year. The most you can do is specify +one particular schedule---usually the present day schedule---and this is +used to convert any date, no matter when. For precise time zone +specifications, it is best to use the time zone information database +(see below). + +The third format looks like this: + +@smallexample +:@var{characters} +@end smallexample + +Each operating system interprets this format differently; in +@theglibc{}, @var{characters} is the name of a file which describes the time +zone. + +@pindex /etc/localtime +@pindex localtime +If the @code{TZ} environment variable does not have a value, the +operation chooses a time zone by default. In @theglibc{}, the +default time zone is like the specification @samp{TZ=:/etc/localtime} +(or @samp{TZ=:/usr/local/etc/localtime}, depending on how @theglibc{} +was configured; @pxref{Installation}). Other C libraries use their own +rule for choosing the default time zone, so there is little we can say +about them. + +@cindex time zone database +@pindex /usr/share/zoneinfo +@pindex zoneinfo +If @var{characters} begins with a slash, it is an absolute file name; +otherwise the library looks for the file +@w{@file{/usr/share/zoneinfo/@var{characters}}}. The @file{zoneinfo} +directory contains data files describing local time zones in many +different parts of the world. The names represent major cities, with +subdirectories for geographical areas; for example, +@file{America/New_York}, @file{Europe/London}, @file{Asia/Hong_Kong}. +These data files are installed by the system administrator, who also +sets @file{/etc/localtime} to point to the data file for the local time +zone. The files typically come from the @url{http://www.iana.org/time-zones, +Time Zone Database} of time zone and daylight saving time +information for most regions of the world, which is maintained by a +community of volunteers and put in the public domain. + +@node Time Zone Functions +@subsection Functions and Variables for Time Zones + +@comment time.h +@comment POSIX.1 +@deftypevar {char *} tzname [2] +The array @code{tzname} contains two strings, which are the standard +names of the pair of time zones (standard and Daylight +Saving) that the user has selected. @code{tzname[0]} is the name of +the standard time zone (for example, @code{"EST"}), and @code{tzname[1]} +is the name for the time zone when Daylight Saving Time is in use (for +example, @code{"EDT"}). These correspond to the @var{std} and @var{dst} +strings (respectively) from the @code{TZ} environment variable. If +Daylight Saving Time is never used, @code{tzname[1]} is the empty string. + +The @code{tzname} array is initialized from the @code{TZ} environment +variable whenever @code{tzset}, @code{ctime}, @code{strftime}, +@code{mktime}, or @code{localtime} is called. If multiple abbreviations +have been used (e.g. @code{"EWT"} and @code{"EDT"} for U.S. Eastern War +Time and Eastern Daylight Time), the array contains the most recent +abbreviation. + +The @code{tzname} array is required for POSIX.1 compatibility, but in +GNU programs it is better to use the @code{tm_zone} member of the +broken-down time structure, since @code{tm_zone} reports the correct +abbreviation even when it is not the latest one. + +Though the strings are declared as @code{char *} the user must refrain +from modifying these strings. Modifying the strings will almost certainly +lead to trouble. + +@end deftypevar + +@comment time.h +@comment POSIX.1 +@deftypefun void tzset (void) +@safety{@prelim{}@mtsafe{@mtsenv{} @mtslocale{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c tzset @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_lock_lock dup @asulock @aculock +@c tzset_internal dup @mtsenv @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_lock_unlock dup @aculock +The @code{tzset} function initializes the @code{tzname} variable from +the value of the @code{TZ} environment variable. It is not usually +necessary for your program to call this function, because it is called +automatically when you use the other time conversion functions that +depend on the time zone. +@end deftypefun + +The following variables are defined for compatibility with System V +Unix. Like @code{tzname}, these variables are set by calling +@code{tzset} or the other time conversion functions. + +@comment time.h +@comment SVID +@deftypevar {long int} timezone +This contains the difference between UTC and the latest local standard +time, in seconds west of UTC. For example, in the U.S. Eastern time +zone, the value is @code{5*60*60}. Unlike the @code{tm_gmtoff} member +of the broken-down time structure, this value is not adjusted for +daylight saving, and its sign is reversed. In GNU programs it is better +to use @code{tm_gmtoff}, since it contains the correct offset even when +it is not the latest one. +@end deftypevar + +@comment time.h +@comment SVID +@deftypevar int daylight +This variable has a nonzero value if Daylight Saving Time rules apply. +A nonzero value does not necessarily mean that Daylight Saving Time is +now in effect; it means only that Daylight Saving Time is sometimes in +effect. +@end deftypevar + +@node Time Functions Example +@subsection Time Functions Example + +Here is an example program showing the use of some of the calendar time +functions. + +@smallexample +@include strftim.c.texi +@end smallexample + +It produces output like this: + +@smallexample +Wed Jul 31 13:02:36 1991 +Today is Wednesday, July 31. +The time is 01:02 PM. +@end smallexample + + +@node Setting an Alarm +@section Setting an Alarm + +The @code{alarm} and @code{setitimer} functions provide a mechanism for a +process to interrupt itself in the future. They do this by setting a +timer; when the timer expires, the process receives a signal. + +@cindex setting an alarm +@cindex interval timer, setting +@cindex alarms, setting +@cindex timers, setting +Each process has three independent interval timers available: + +@itemize @bullet +@item +A real-time timer that counts elapsed time. This timer sends a +@code{SIGALRM} signal to the process when it expires. +@cindex real-time timer +@cindex timer, real-time + +@item +A virtual timer that counts processor time used by the process. This timer +sends a @code{SIGVTALRM} signal to the process when it expires. +@cindex virtual timer +@cindex timer, virtual + +@item +A profiling timer that counts both processor time used by the process, +and processor time spent in system calls on behalf of the process. This +timer sends a @code{SIGPROF} signal to the process when it expires. +@cindex profiling timer +@cindex timer, profiling + +This timer is useful for profiling in interpreters. The interval timer +mechanism does not have the fine granularity necessary for profiling +native code. +@c @xref{profil} !!! +@end itemize + +You can only have one timer of each kind set at any given time. If you +set a timer that has not yet expired, that timer is simply reset to the +new value. + +You should establish a handler for the appropriate alarm signal using +@code{signal} or @code{sigaction} before issuing a call to +@code{setitimer} or @code{alarm}. Otherwise, an unusual chain of events +could cause the timer to expire before your program establishes the +handler. In this case it would be terminated, since termination is the +default action for the alarm signals. @xref{Signal Handling}. + +To be able to use the alarm function to interrupt a system call which +might block otherwise indefinitely it is important to @emph{not} set the +@code{SA_RESTART} flag when registering the signal handler using +@code{sigaction}. When not using @code{sigaction} things get even +uglier: the @code{signal} function has fixed semantics with respect +to restarts. The BSD semantics for this function is to set the flag. +Therefore, if @code{sigaction} for whatever reason cannot be used, it is +necessary to use @code{sysv_signal} and not @code{signal}. + +The @code{setitimer} function is the primary means for setting an alarm. +This facility is declared in the header file @file{sys/time.h}. The +@code{alarm} function, declared in @file{unistd.h}, provides a somewhat +simpler interface for setting the real-time timer. +@pindex unistd.h +@pindex sys/time.h + +@comment sys/time.h +@comment BSD +@deftp {Data Type} {struct itimerval} +This structure is used to specify when a timer should expire. It contains +the following members: +@table @code +@item struct timeval it_interval +This is the period between successive timer interrupts. If zero, the +alarm will only be sent once. + +@item struct timeval it_value +This is the period between now and the first timer interrupt. If zero, +the alarm is disabled. +@end table + +The @code{struct timeval} data type is described in @ref{Elapsed Time}. +@end deftp + +@comment sys/time.h +@comment BSD +@deftypefun int setitimer (int @var{which}, const struct itimerval *@var{new}, struct itimerval *@var{old}) +@safety{@prelim{}@mtsafe{@mtstimer{}}@assafe{}@acsafe{}} +@c This function is marked with @mtstimer because the same set of timers +@c is shared by all threads of a process, so calling it in one thread +@c may interfere with timers set by another thread. This interference +@c is not regarded as destructive, because the interface specification +@c makes this overriding while returning the previous value the expected +@c behavior, and the kernel will serialize concurrent calls so that the +@c last one prevails, with each call getting the timer information from +@c the timer installed by the previous call in that serialization. +The @code{setitimer} function sets the timer specified by @var{which} +according to @var{new}. The @var{which} argument can have a value of +@code{ITIMER_REAL}, @code{ITIMER_VIRTUAL}, or @code{ITIMER_PROF}. + +If @var{old} is not a null pointer, @code{setitimer} returns information +about any previous unexpired timer of the same kind in the structure it +points to. + +The return value is @code{0} on success and @code{-1} on failure. The +following @code{errno} error conditions are defined for this function: + +@table @code +@item EINVAL +The timer period is too large. +@end table +@end deftypefun + +@comment sys/time.h +@comment BSD +@deftypefun int getitimer (int @var{which}, struct itimerval *@var{old}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getitimer} function stores information about the timer specified +by @var{which} in the structure pointed at by @var{old}. + +The return value and error conditions are the same as for @code{setitimer}. +@end deftypefun + +@vtable @code +@comment sys/time.h +@comment BSD +@item ITIMER_REAL +This constant can be used as the @var{which} argument to the +@code{setitimer} and @code{getitimer} functions to specify the real-time +timer. + +@comment sys/time.h +@comment BSD +@item ITIMER_VIRTUAL +This constant can be used as the @var{which} argument to the +@code{setitimer} and @code{getitimer} functions to specify the virtual +timer. + +@comment sys/time.h +@comment BSD +@item ITIMER_PROF +This constant can be used as the @var{which} argument to the +@code{setitimer} and @code{getitimer} functions to specify the profiling +timer. +@end vtable + +@comment unistd.h +@comment POSIX.1 +@deftypefun {unsigned int} alarm (unsigned int @var{seconds}) +@safety{@prelim{}@mtsafe{@mtstimer{}}@assafe{}@acsafe{}} +@c Wrapper for setitimer. +The @code{alarm} function sets the real-time timer to expire in +@var{seconds} seconds. If you want to cancel any existing alarm, you +can do this by calling @code{alarm} with a @var{seconds} argument of +zero. + +The return value indicates how many seconds remain before the previous +alarm would have been sent. If there was no previous alarm, @code{alarm} +returns zero. +@end deftypefun + +The @code{alarm} function could be defined in terms of @code{setitimer} +like this: + +@smallexample +unsigned int +alarm (unsigned int seconds) +@{ + struct itimerval old, new; + new.it_interval.tv_usec = 0; + new.it_interval.tv_sec = 0; + new.it_value.tv_usec = 0; + new.it_value.tv_sec = (long int) seconds; + if (setitimer (ITIMER_REAL, &new, &old) < 0) + return 0; + else + return old.it_value.tv_sec; +@} +@end smallexample + +There is an example showing the use of the @code{alarm} function in +@ref{Handler Returns}. + +If you simply want your process to wait for a given number of seconds, +you should use the @code{sleep} function. @xref{Sleeping}. + +You shouldn't count on the signal arriving precisely when the timer +expires. In a multiprocessing environment there is typically some +amount of delay involved. + +@strong{Portability Note:} The @code{setitimer} and @code{getitimer} +functions are derived from BSD Unix, while the @code{alarm} function is +specified by the POSIX.1 standard. @code{setitimer} is more powerful than +@code{alarm}, but @code{alarm} is more widely used. + +@node Sleeping +@section Sleeping + +The function @code{sleep} gives a simple way to make the program wait +for a short interval. If your program doesn't use signals (except to +terminate), then you can expect @code{sleep} to wait reliably throughout +the specified interval. Otherwise, @code{sleep} can return sooner if a +signal arrives; if you want to wait for a given interval regardless of +signals, use @code{select} (@pxref{Waiting for I/O}) and don't specify +any descriptors to wait for. +@c !!! select can get EINTR; using SA_RESTART makes sleep win too. + +@comment unistd.h +@comment POSIX.1 +@deftypefun {unsigned int} sleep (unsigned int @var{seconds}) +@safety{@prelim{}@mtunsafe{@mtascusig{:SIGCHLD/linux}}@asunsafe{}@acunsafe{}} +@c On Mach, it uses ports and calls time. On generic posix, it calls +@c nanosleep. On Linux, it temporarily blocks SIGCHLD, which is MT- and +@c AS-Unsafe, and in a way that makes it AC-Unsafe (C-unsafe, even!). +The @code{sleep} function waits for @var{seconds} seconds or until a signal +is delivered, whichever happens first. + +If @code{sleep} returns because the requested interval is over, +it returns a value of zero. If it returns because of delivery of a +signal, its return value is the remaining time in the sleep interval. + +The @code{sleep} function is declared in @file{unistd.h}. +@end deftypefun + +Resist the temptation to implement a sleep for a fixed amount of time by +using the return value of @code{sleep}, when nonzero, to call +@code{sleep} again. This will work with a certain amount of accuracy as +long as signals arrive infrequently. But each signal can cause the +eventual wakeup time to be off by an additional second or so. Suppose a +few signals happen to arrive in rapid succession by bad luck---there is +no limit on how much this could shorten or lengthen the wait. + +Instead, compute the calendar time at which the program should stop +waiting, and keep trying to wait until that calendar time. This won't +be off by more than a second. With just a little more work, you can use +@code{select} and make the waiting period quite accurate. (Of course, +heavy system load can cause additional unavoidable delays---unless the +machine is dedicated to one application, there is no way you can avoid +this.) + +On some systems, @code{sleep} can do strange things if your program uses +@code{SIGALRM} explicitly. Even if @code{SIGALRM} signals are being +ignored or blocked when @code{sleep} is called, @code{sleep} might +return prematurely on delivery of a @code{SIGALRM} signal. If you have +established a handler for @code{SIGALRM} signals and a @code{SIGALRM} +signal is delivered while the process is sleeping, the action taken +might be just to cause @code{sleep} to return instead of invoking your +handler. And, if @code{sleep} is interrupted by delivery of a signal +whose handler requests an alarm or alters the handling of @code{SIGALRM}, +this handler and @code{sleep} will interfere. + +On @gnusystems{}, it is safe to use @code{sleep} and @code{SIGALRM} in +the same program, because @code{sleep} does not work by means of +@code{SIGALRM}. + +@comment time.h +@comment POSIX.1 +@deftypefun int nanosleep (const struct timespec *@var{requested_time}, struct timespec *@var{remaining}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c On Linux, it's a syscall. On Mach, it calls gettimeofday and uses +@c ports. +If resolution to seconds is not enough the @code{nanosleep} function can +be used. As the name suggests the sleep interval can be specified in +nanoseconds. The actual elapsed time of the sleep interval might be +longer since the system rounds the elapsed time you request up to the +next integer multiple of the actual resolution the system can deliver. + +*@code{requested_time} is the elapsed time of the interval you want to +sleep. + +The function returns as *@code{remaining} the elapsed time left in the +interval for which you requested to sleep. If the interval completed +without getting interrupted by a signal, this is zero. + +@code{struct timespec} is described in @xref{Elapsed Time}. + +If the function returns because the interval is over the return value is +zero. If the function returns @math{-1} the global variable @var{errno} +is set to the following values: + +@table @code +@item EINTR +The call was interrupted because a signal was delivered to the thread. +If the @var{remaining} parameter is not the null pointer the structure +pointed to by @var{remaining} is updated to contain the remaining +elapsed time. + +@item EINVAL +The nanosecond value in the @var{requested_time} parameter contains an +illegal value. Either the value is negative or greater than or equal to +1000 million. +@end table + +This function is a cancellation point in multi-threaded programs. This +is a problem if the thread allocates some resources (like memory, file +descriptors, semaphores or whatever) at the time @code{nanosleep} is +called. If the thread gets canceled these resources stay allocated +until the program ends. To avoid this calls to @code{nanosleep} should +be protected using cancellation handlers. +@c ref pthread_cleanup_push / pthread_cleanup_pop + +The @code{nanosleep} function is declared in @file{time.h}. +@end deftypefun diff --git a/REORG.TODO/manual/tsort.awk b/REORG.TODO/manual/tsort.awk new file mode 100644 index 0000000000..fd313dee64 --- /dev/null +++ b/REORG.TODO/manual/tsort.awk @@ -0,0 +1,46 @@ +#!/usr/bin/awk -f +# Generate topologically sorted list of manual chapters. +# Copyright (C) 1998-2017 Free Software Foundation, Inc. +# Written by Ulrich Drepper <drepper@cygnus.com>, 1998. + +BEGIN { + cnt = 0 + dnt = 0 +} +{ + to[dnt] = $1 + from[dnt] = $2 + ++dnt + all[cnt++] = $1 +} +END { + do { + moved = 0 + for (i = 0; i < dnt; ++i) { + for (j = 0; j < cnt; ++j) { + if (all[j] == from[i]) { + for (k = j + 1; k < cnt; ++k) { + if (all[k] == to[i]) { + break; + } + } + if (k < cnt) { + for (l = k - 1; l >= j; --l) { + all[l + 1] = all[l] + } + all[j] = to[i] + break; + } + } + } + if (j < cnt) { + moved = 1 + break + } + } + } while (moved) + + for (i = 0; i < cnt; ++i) { + print all[i]; + } +} diff --git a/REORG.TODO/manual/tunables.texi b/REORG.TODO/manual/tunables.texi new file mode 100644 index 0000000000..c9a4cb7fe5 --- /dev/null +++ b/REORG.TODO/manual/tunables.texi @@ -0,0 +1,215 @@ +@node Tunables +@c @node Tunables, , Internal Probes, Top +@c %MENU% Tunable switches to alter libc internal behavior +@chapter Tunables +@cindex tunables + +@dfn{Tunables} are a feature in @theglibc{} that allows application authors and +distribution maintainers to alter the runtime library behavior to match +their workload. These are implemented as a set of switches that may be +modified in different ways. The current default method to do this is via +the @env{GLIBC_TUNABLES} environment variable by setting it to a string +of colon-separated @var{name}=@var{value} pairs. For example, the following +example enables malloc checking and sets the malloc trim threshold to 128 +bytes: + +@example +GLIBC_TUNABLES=glibc.malloc.trim_threshold=128:glibc.malloc.check=3 +export GLIBC_TUNABLES +@end example + +Tunables are not part of the @glibcadj{} stable ABI, and they are +subject to change or removal across releases. Additionally, the method to +modify tunable values may change between releases and across distributions. +It is possible to implement multiple `frontends' for the tunables allowing +distributions to choose their preferred method at build time. + +Finally, the set of tunables available may vary between distributions as +the tunables feature allows distributions to add their own tunables under +their own namespace. + +@menu +* Tunable names:: The structure of a tunable name +* Memory Allocation Tunables:: Tunables in the memory allocation subsystem +* Hardware Capability Tunables:: Tunables that modify the hardware + capabilities seen by @theglibc{} +@end menu + +@node Tunable names +@section Tunable names +@cindex Tunable names +@cindex Tunable namespaces + +A tunable name is split into three components, a top namespace, a tunable +namespace and the tunable name. The top namespace for tunables implemented in +@theglibc{} is @code{glibc}. Distributions that choose to add custom tunables +in their maintained versions of @theglibc{} may choose to do so under their own +top namespace. + +The tunable namespace is a logical grouping of tunables in a single +module. This currently holds no special significance, although that may +change in the future. + +The tunable name is the actual name of the tunable. It is possible that +different tunable namespaces may have tunables within them that have the +same name, likewise for top namespaces. Hence, we only support +identification of tunables by their full name, i.e. with the top +namespace, tunable namespace and tunable name, separated by periods. + +@node Memory Allocation Tunables +@section Memory Allocation Tunables +@cindex memory allocation tunables +@cindex malloc tunables +@cindex tunables, malloc + +@deftp {Tunable namespace} glibc.malloc +Memory allocation behavior can be modified by setting any of the +following tunables in the @code{malloc} namespace: +@end deftp + +@deftp Tunable glibc.malloc.check +This tunable supersedes the @env{MALLOC_CHECK_} environment variable and is +identical in features. + +Setting this tunable enables a special (less efficient) memory allocator for +the malloc family of functions that is designed to be tolerant against simple +errors such as double calls of free with the same argument, or overruns of a +single byte (off-by-one bugs). Not all such errors can be protected against, +however, and memory leaks can result. The following list describes the values +that this tunable can take and the effect they have on malloc functionality: + +@itemize @bullet +@item @code{0} Ignore all errors. The default allocator continues to be in +use, but all errors are silently ignored. +@item @code{1} Report errors. The alternate allocator is selected and heap +corruption, if detected, is reported as diagnostic messages to @code{stderr} +and the program continues execution. +@item @code{2} Abort on errors. The alternate allocator is selected and if +heap corruption is detected, the program is ended immediately by calling +@code{abort}. +@item @code{3} Fully enabled. The alternate allocator is selected and is fully +functional. That is, if heap corruption is detected, a verbose diagnostic +message is printed to @code{stderr} and the program is ended by calling +@code{abort}. +@end itemize + +Like @env{MALLOC_CHECK_}, @code{glibc.malloc.check} has a problem in that it +diverges from normal program behavior by writing to @code{stderr}, which could +by exploited in SUID and SGID binaries. Therefore, @code{glibc.malloc.check} +is disabled by default for SUID and SGID binaries. This can be enabled again +by the system administrator by adding a file @file{/etc/suid-debug}; the +content of the file could be anything or even empty. +@end deftp + +@deftp Tunable glibc.malloc.top_pad +This tunable supersedes the @env{MALLOC_TOP_PAD_} environment variable and is +identical in features. + +This tunable determines the amount of extra memory in bytes to obtain from the +system when any of the arenas need to be extended. It also specifies the +number of bytes to retain when shrinking any of the arenas. This provides the +necessary hysteresis in heap size such that excessive amounts of system calls +can be avoided. + +The default value of this tunable is @samp{0}. +@end deftp + +@deftp Tunable glibc.malloc.perturb +This tunable supersedes the @env{MALLOC_PERTURB_} environment variable and is +identical in features. + +If set to a non-zero value, memory blocks are initialized with values depending +on some low order bits of this tunable when they are allocated (except when +allocated by calloc) and freed. This can be used to debug the use of +uninitialized or freed heap memory. Note that this option does not guarantee +that the freed block will have any specific values. It only guarantees that the +content the block had before it was freed will be overwritten. + +The default value of this tunable is @samp{0}. +@end deftp + +@deftp Tunable glibc.malloc.mmap_threshold +This tunable supersedes the @env{MALLOC_MMAP_THRESHOLD_} environment variable +and is identical in features. + +When this tunable is set, all chunks larger than this value in bytes are +allocated outside the normal heap, using the @code{mmap} system call. This way +it is guaranteed that the memory for these chunks can be returned to the system +on @code{free}. Note that requests smaller than this threshold might still be +allocated via @code{mmap}. + +If this tunable is not set, the default value is set to @samp{131072} bytes and +the threshold is adjusted dynamically to suit the allocation patterns of the +program. If the tunable is set, the dynamic adjustment is disabled and the +value is set as static. +@end deftp + +@deftp Tunable glibc.malloc.trim_threshold +This tunable supersedes the @env{MALLOC_TRIM_THRESHOLD_} environment variable +and is identical in features. + +The value of this tunable is the minimum size (in bytes) of the top-most, +releasable chunk in an arena that will trigger a system call in order to return +memory to the system from that arena. + +If this tunable is not set, the default value is set as 128 KB and the +threshold is adjusted dynamically to suit the allocation patterns of the +program. If the tunable is set, the dynamic adjustment is disabled and the +value is set as static. +@end deftp + +@deftp Tunable glibc.malloc.mmap_max +This tunable supersedes the @env{MALLOC_MMAP_MAX_} environment variable and is +identical in features. + +The value of this tunable is maximum number of chunks to allocate with +@code{mmap}. Setting this to zero disables all use of @code{mmap}. + +The default value of this tunable is @samp{65536}. +@end deftp + +@deftp Tunable glibc.malloc.arena_test +This tunable supersedes the @env{MALLOC_ARENA_TEST} environment variable and is +identical in features. + +The @code{glibc.malloc.arena_test} tunable specifies the number of arenas that +can be created before the test on the limit to the number of arenas is +conducted. The value is ignored if @code{glibc.malloc.arena_max} is set. + +The default value of this tunable is 2 for 32-bit systems and 8 for 64-bit +systems. +@end deftp + +@deftp Tunable glibc.malloc.arena_max +This tunable supersedes the @env{MALLOC_ARENA_MAX} environment variable and is +identical in features. + +This tunable sets the number of arenas to use in a process regardless of the +number of cores in the system. + +The default value of this tunable is @code{0}, meaning that the limit on the +number of arenas is determined by the number of CPU cores online. For 32-bit +systems the limit is twice the number of cores online and on 64-bit systems, it +is 8 times the number of cores online. +@end deftp + +@node Hardware Capability Tunables +@section Hardware Capability Tunables +@cindex hardware capability tunables +@cindex hwcap tunables +@cindex tunables, hwcap + +@deftp {Tunable namespace} glibc.tune +Behavior of @theglibc{} can be tuned to assume specific hardware capabilities +by setting the following tunables in the @code{tune} namespace: +@end deftp + +@deftp Tunable glibc.tune.hwcap_mask +This tunable supersedes the @env{LD_HWCAP_MASK} environment variable and is +identical in features. + +The @code{AT_HWCAP} key in the Auxilliary Vector specifies instruction set +extensions available in the processor at runtime for some architectures. The +@code{glibc.tune.hwcap_mask} tunable allows the user to mask out those +capabilities at runtime, thus disabling use of those extensions. +@end deftp diff --git a/REORG.TODO/manual/users.texi b/REORG.TODO/manual/users.texi new file mode 100644 index 0000000000..47e28febdc --- /dev/null +++ b/REORG.TODO/manual/users.texi @@ -0,0 +1,2837 @@ +@node Users and Groups, System Management, Name Service Switch, Top +@c %MENU% How users are identified and classified +@chapter Users and Groups + +Every user who can log in on the system is identified by a unique number +called the @dfn{user ID}. Each process has an effective user ID which +says which user's access permissions it has. + +Users are classified into @dfn{groups} for access control purposes. Each +process has one or more @dfn{group ID values} which say which groups the +process can use for access to files. + +The effective user and group IDs of a process collectively form its +@dfn{persona}. This determines which files the process can access. +Normally, a process inherits its persona from the parent process, but +under special circumstances a process can change its persona and thus +change its access permissions. + +Each file in the system also has a user ID and a group ID. Access +control works by comparing the user and group IDs of the file with those +of the running process. + +The system keeps a database of all the registered users, and another +database of all the defined groups. There are library functions you +can use to examine these databases. + +@menu +* User and Group IDs:: Each user has a unique numeric ID; + likewise for groups. +* Process Persona:: The user IDs and group IDs of a process. +* Why Change Persona:: Why a program might need to change + its user and/or group IDs. +* How Change Persona:: Changing the user and group IDs. +* Reading Persona:: How to examine the user and group IDs. + +* Setting User ID:: Functions for setting the user ID. +* Setting Groups:: Functions for setting the group IDs. + +* Enable/Disable Setuid:: Turning setuid access on and off. +* Setuid Program Example:: The pertinent parts of one sample program. +* Tips for Setuid:: How to avoid granting unlimited access. + +* Who Logged In:: Getting the name of the user who logged in, + or of the real user ID of the current process. + +* User Accounting Database:: Keeping information about users and various + actions in databases. + +* User Database:: Functions and data structures for + accessing the user database. +* Group Database:: Functions and data structures for + accessing the group database. +* Database Example:: Example program showing the use of database + inquiry functions. +* Netgroup Database:: Functions for accessing the netgroup database. +@end menu + +@node User and Group IDs +@section User and Group IDs + +@cindex login name +@cindex user name +@cindex user ID +Each user account on a computer system is identified by a @dfn{user +name} (or @dfn{login name}) and @dfn{user ID}. Normally, each user name +has a unique user ID, but it is possible for several login names to have +the same user ID. The user names and corresponding user IDs are stored +in a data base which you can access as described in @ref{User Database}. + +@cindex group name +@cindex group ID +Users are classified in @dfn{groups}. Each user name belongs to one +@dfn{default group} and may also belong to any number of +@dfn{supplementary groups}. Users who are members of the same group can +share resources (such as files) that are not accessible to users who are +not a member of that group. Each group has a @dfn{group name} and +@dfn{group ID}. @xref{Group Database}, for how to find information +about a group ID or group name. + +@node Process Persona +@section The Persona of a Process +@cindex persona +@cindex effective user ID +@cindex effective group ID +@cindex supplementary group IDs + +@c When Hurd is more widely used, explain multiple effective user IDs +@c here. -zw +At any time, each process has an @dfn{effective user ID}, a @dfn{effective +group ID}, and a set of @dfn{supplementary group IDs}. These IDs +determine the privileges of the process. They are collectively +called the @dfn{persona} of the process, because they determine ``who it +is'' for purposes of access control. + +Your login shell starts out with a persona which consists of your user +ID, your default group ID, and your supplementary group IDs (if you are +in more than one group). In normal circumstances, all your other processes +inherit these values. + +@cindex real user ID +@cindex real group ID +A process also has a @dfn{real user ID} which identifies the user who +created the process, and a @dfn{real group ID} which identifies that +user's default group. These values do not play a role in access +control, so we do not consider them part of the persona. But they are +also important. + +Both the real and effective user ID can be changed during the lifetime +of a process. @xref{Why Change Persona}. + +For details on how a process's effective user ID and group IDs affect +its permission to access files, see @ref{Access Permission}. + +The effective user ID of a process also controls permissions for sending +signals using the @code{kill} function. @xref{Signaling Another +Process}. + +Finally, there are many operations which can only be performed by a +process whose effective user ID is zero. A process with this user ID is +a @dfn{privileged process}. Commonly the user name @code{root} is +associated with user ID 0, but there may be other user names with this +ID. +@c !!! should mention POSIX capabilities here. + +@node Why Change Persona +@section Why Change the Persona of a Process? + +The most obvious situation where it is necessary for a process to change +its user and/or group IDs is the @code{login} program. When +@code{login} starts running, its user ID is @code{root}. Its job is to +start a shell whose user and group IDs are those of the user who is +logging in. (To accomplish this fully, @code{login} must set the real +user and group IDs as well as its persona. But this is a special case.) + +The more common case of changing persona is when an ordinary user +program needs access to a resource that wouldn't ordinarily be +accessible to the user actually running it. + +For example, you may have a file that is controlled by your program but +that shouldn't be read or modified directly by other users, either +because it implements some kind of locking protocol, or because you want +to preserve the integrity or privacy of the information it contains. +This kind of restricted access can be implemented by having the program +change its effective user or group ID to match that of the resource. + +Thus, imagine a game program that saves scores in a file. The game +program itself needs to be able to update this file no matter who is +running it, but if users can write the file without going through the +game, they can give themselves any scores they like. Some people +consider this undesirable, or even reprehensible. It can be prevented +by creating a new user ID and login name (say, @code{games}) to own the +scores file, and make the file writable only by this user. Then, when +the game program wants to update this file, it can change its effective +user ID to be that for @code{games}. In effect, the program must +adopt the persona of @code{games} so it can write to the scores file. + +@node How Change Persona +@section How an Application Can Change Persona +@cindex @code{setuid} programs +@cindex saved set-user-ID +@cindex saved set-group-ID +@cindex @code{_POSIX_SAVED_IDS} + +The ability to change the persona of a process can be a source of +unintentional privacy violations, or even intentional abuse. Because of +the potential for problems, changing persona is restricted to special +circumstances. + +You can't arbitrarily set your user ID or group ID to anything you want; +only privileged processes can do that. Instead, the normal way for a +program to change its persona is that it has been set up in advance to +change to a particular user or group. This is the function of the setuid +and setgid bits of a file's access mode. @xref{Permission Bits}. + +When the setuid bit of an executable file is on, executing that file +gives the process a third user ID: the @dfn{file user ID}. This ID is +set to the owner ID of the file. The system then changes the effective +user ID to the file user ID. The real user ID remains as it was. +Likewise, if the setgid bit is on, the process is given a @dfn{file +group ID} equal to the group ID of the file, and its effective group ID +is changed to the file group ID. + +If a process has a file ID (user or group), then it can at any time +change its effective ID to its real ID and back to its file ID. +Programs use this feature to relinquish their special privileges except +when they actually need them. This makes it less likely that they can +be tricked into doing something inappropriate with their privileges. + +@strong{Portability Note:} Older systems do not have file IDs. +To determine if a system has this feature, you can test the compiler +define @code{_POSIX_SAVED_IDS}. (In the POSIX standard, file IDs are +known as saved IDs.) + +@xref{File Attributes}, for a more general discussion of file modes and +accessibility. + +@node Reading Persona +@section Reading the Persona of a Process + +Here are detailed descriptions of the functions for reading the user and +group IDs of a process, both real and effective. To use these +facilities, you must include the header files @file{sys/types.h} and +@file{unistd.h}. +@pindex unistd.h +@pindex sys/types.h + +@comment sys/types.h +@comment POSIX.1 +@deftp {Data Type} uid_t +This is an integer data type used to represent user IDs. In +@theglibc{}, this is an alias for @code{unsigned int}. +@end deftp + +@comment sys/types.h +@comment POSIX.1 +@deftp {Data Type} gid_t +This is an integer data type used to represent group IDs. In +@theglibc{}, this is an alias for @code{unsigned int}. +@end deftp + +@comment unistd.h +@comment POSIX.1 +@deftypefun uid_t getuid (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@c Atomic syscall, except on hurd, where it takes a lock within a hurd +@c critical section. +The @code{getuid} function returns the real user ID of the process. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun gid_t getgid (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getgid} function returns the real group ID of the process. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun uid_t geteuid (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{geteuid} function returns the effective user ID of the process. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun gid_t getegid (void) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getegid} function returns the effective group ID of the process. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int getgroups (int @var{count}, gid_t *@var{groups}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +The @code{getgroups} function is used to inquire about the supplementary +group IDs of the process. Up to @var{count} of these group IDs are +stored in the array @var{groups}; the return value from the function is +the number of group IDs actually stored. If @var{count} is smaller than +the total number of supplementary group IDs, then @code{getgroups} +returns a value of @code{-1} and @code{errno} is set to @code{EINVAL}. + +If @var{count} is zero, then @code{getgroups} just returns the total +number of supplementary group IDs. On systems that do not support +supplementary groups, this will always be zero. + +Here's how to use @code{getgroups} to read all the supplementary group +IDs: + +@smallexample +@group +gid_t * +read_all_groups (void) +@{ + int ngroups = getgroups (0, NULL); + gid_t *groups + = (gid_t *) xmalloc (ngroups * sizeof (gid_t)); + int val = getgroups (ngroups, groups); + if (val < 0) + @{ + free (groups); + return NULL; + @} + return groups; +@} +@end group +@end smallexample +@end deftypefun + +@node Setting User ID +@section Setting the User ID + +This section describes the functions for altering the user ID (real +and/or effective) of a process. To use these facilities, you must +include the header files @file{sys/types.h} and @file{unistd.h}. +@pindex unistd.h +@pindex sys/types.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun int seteuid (uid_t @var{neweuid}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c seteuid @asulock @aculock +@c INLINE_SETXID_SYSCALL @asulock @aculock +@c This may be just a unix syscall, or the ugliness below used by +@c nptl to propagate the syscall to all cloned processes used to +@c implement threads. +@c nptl_setxid @asulock @aculock +@c while holding the stack_alloc_lock, mark with SETXID_BITMASK all +@c threads that are not exiting, signal them until no thread remains +@c marked, clear the marks and run the syscall, then release the lock. +@c lll_lock @asulock @aculock +@c list_for_each ok +@c list_entry ok +@c setxid_mark_thread ok +@c if a thread is initializing, wait for it to be cloned. +@c mark it with SETXID_BITMASK if it's not exiting +@c setxid_signal_thread ok +@c if a thread is marked with SETXID_BITMASK, +@c send it the SIGSETXID signal +@c setxid_unmark_thread ok +@c clear SETXID_BITMASK and release the futex if SETXID_BITMASK is +@c set. +@c <syscall> ok +@c lll_unlock @aculock +@c +@c sighandler_setxid ok +@c issue the syscall, clear SETXID_BITMASK, release the futex, and +@c wake up the signaller loop if the counter reached zero. +This function sets the effective user ID of a process to @var{neweuid}, +provided that the process is allowed to change its effective user ID. A +privileged process (effective user ID zero) can change its effective +user ID to any legal value. An unprivileged process with a file user ID +can change its effective user ID to its real user ID or to its file user +ID. Otherwise, a process may not change its effective user ID at all. + +The @code{seteuid} function returns a value of @code{0} to indicate +successful completion, and a value of @code{-1} to indicate an error. +The following @code{errno} error conditions are defined for this +function: + +@table @code +@item EINVAL +The value of the @var{neweuid} argument is invalid. + +@item EPERM +The process may not change to the specified ID. +@end table + +Older systems (those without the @code{_POSIX_SAVED_IDS} feature) do not +have this function. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int setuid (uid_t @var{newuid}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c setuid @asulock @aculock +@c INLINE_SETXID_SYSCALL dup @asulock @aculock +If the calling process is privileged, this function sets both the real +and effective user IDs of the process to @var{newuid}. It also deletes +the file user ID of the process, if any. @var{newuid} may be any +legal value. (Once this has been done, there is no way to recover the +old effective user ID.) + +If the process is not privileged, and the system supports the +@code{_POSIX_SAVED_IDS} feature, then this function behaves like +@code{seteuid}. + +The return values and error conditions are the same as for @code{seteuid}. +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun int setreuid (uid_t @var{ruid}, uid_t @var{euid}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c setreuid @asulock @aculock +@c INLINE_SETXID_SYSCALL dup @asulock @aculock +This function sets the real user ID of the process to @var{ruid} and the +effective user ID to @var{euid}. If @var{ruid} is @code{-1}, it means +not to change the real user ID; likewise if @var{euid} is @code{-1}, it +means not to change the effective user ID. + +The @code{setreuid} function exists for compatibility with 4.3 BSD Unix, +which does not support file IDs. You can use this function to swap the +effective and real user IDs of the process. (Privileged processes are +not limited to this particular usage.) If file IDs are supported, you +should use that feature instead of this function. @xref{Enable/Disable +Setuid}. + +The return value is @code{0} on success and @code{-1} on failure. +The following @code{errno} error conditions are defined for this +function: + +@table @code +@item EPERM +The process does not have the appropriate privileges; you do not +have permission to change to the specified ID. +@end table +@end deftypefun + +@node Setting Groups +@section Setting the Group IDs + +This section describes the functions for altering the group IDs (real +and effective) of a process. To use these facilities, you must include +the header files @file{sys/types.h} and @file{unistd.h}. +@pindex unistd.h +@pindex sys/types.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun int setegid (gid_t @var{newgid}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c setegid @asulock @aculock +@c INLINE_SETXID_SYSCALL dup @asulock @aculock +This function sets the effective group ID of the process to +@var{newgid}, provided that the process is allowed to change its group +ID. Just as with @code{seteuid}, if the process is privileged it may +change its effective group ID to any value; if it isn't, but it has a +file group ID, then it may change to its real group ID or file group ID; +otherwise it may not change its effective group ID. + +Note that a process is only privileged if its effective @emph{user} ID +is zero. The effective group ID only affects access permissions. + +The return values and error conditions for @code{setegid} are the same +as those for @code{seteuid}. + +This function is only present if @code{_POSIX_SAVED_IDS} is defined. +@end deftypefun + +@comment unistd.h +@comment POSIX.1 +@deftypefun int setgid (gid_t @var{newgid}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c setgid @asulock @aculock +@c INLINE_SETXID_SYSCALL dup @asulock @aculock +This function sets both the real and effective group ID of the process +to @var{newgid}, provided that the process is privileged. It also +deletes the file group ID, if any. + +If the process is not privileged, then @code{setgid} behaves like +@code{setegid}. + +The return values and error conditions for @code{setgid} are the same +as those for @code{seteuid}. +@end deftypefun + +@comment unistd.h +@comment BSD +@deftypefun int setregid (gid_t @var{rgid}, gid_t @var{egid}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c setregid @asulock @aculock +@c INLINE_SETXID_SYSCALL dup @asulock @aculock +This function sets the real group ID of the process to @var{rgid} and +the effective group ID to @var{egid}. If @var{rgid} is @code{-1}, it +means not to change the real group ID; likewise if @var{egid} is +@code{-1}, it means not to change the effective group ID. + +The @code{setregid} function is provided for compatibility with 4.3 BSD +Unix, which does not support file IDs. You can use this function to +swap the effective and real group IDs of the process. (Privileged +processes are not limited to this usage.) If file IDs are supported, +you should use that feature instead of using this function. +@xref{Enable/Disable Setuid}. + +The return values and error conditions for @code{setregid} are the same +as those for @code{setreuid}. +@end deftypefun + +@code{setuid} and @code{setgid} behave differently depending on whether +the effective user ID at the time is zero. If it is not zero, they +behave like @code{seteuid} and @code{setegid}. If it is, they change +both effective and real IDs and delete the file ID. To avoid confusion, +we recommend you always use @code{seteuid} and @code{setegid} except +when you know the effective user ID is zero and your intent is to change +the persona permanently. This case is rare---most of the programs that +need it, such as @code{login} and @code{su}, have already been written. + +Note that if your program is setuid to some user other than @code{root}, +there is no way to drop privileges permanently. + +The system also lets privileged processes change their supplementary +group IDs. To use @code{setgroups} or @code{initgroups}, your programs +should include the header file @file{grp.h}. +@pindex grp.h + +@comment grp.h +@comment BSD +@deftypefun int setgroups (size_t @var{count}, const gid_t *@var{groups}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +@c setgroups @asulock @aculock +@c INLINE_SETXID_SYSCALL dup @asulock @aculock +This function sets the process's supplementary group IDs. It can only +be called from privileged processes. The @var{count} argument specifies +the number of group IDs in the array @var{groups}. + +This function returns @code{0} if successful and @code{-1} on error. +The following @code{errno} error conditions are defined for this +function: + +@table @code +@item EPERM +The calling process is not privileged. +@end table +@end deftypefun + +@comment grp.h +@comment BSD +@deftypefun int initgroups (const char *@var{user}, gid_t @var{group}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @acsmem{} @acsfd{} @aculock{}}} +@c initgroups @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c sysconf(_SC_NGROUPS_MAX) dup @acsfd +@c MIN dup ok +@c malloc @ascuheap @acsmem +@c internal_getgrouplist @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nscd_getgrouplist @ascuheap @acsfd @acsmem +@c nscd_get_map_ref dup @ascuheap @acsfd @acsmem +@c nscd_cache_search dup ok +@c nscd_open_socket dup @acsfd +@c realloc dup @ascuheap @acsmem +@c readall dup ok +@c memcpy dup ok +@c close_not_cancel_no_status dup @acsfd +@c nscd_drop_map_ref dup @ascuheap @acsmem +@c nscd_unmap dup @ascuheap @acsmem +@c nss_database_lookup dup @mtslocale @ascuheap @asulock @acucorrupt @acsmem @acsfd @aculock +@c nss_lookup_function dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c compat_call @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c sysconf(_SC_GETGR_R_SIZE_MAX) ok +@c nss_lookup_function dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *getgrent_fct @ascuplugin +@c *setgrent_fct @ascuplugin +@c *endgrent_fct @ascuplugin +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c *initgroups_dyn_fct @ascuplugin +@c nss_next_action dup ok +@c setgroups dup @asulock @aculock +@c free dup @ascuheap @acsmem +The @code{initgroups} function sets the process's supplementary group +IDs to be the normal default for the user name @var{user}. The group +@var{group} is automatically included. + +This function works by scanning the group database for all the groups +@var{user} belongs to. It then calls @code{setgroups} with the list it +has constructed. + +The return values and error conditions are the same as for +@code{setgroups}. +@end deftypefun + +If you are interested in the groups a particular user belongs to, but do +not want to change the process's supplementary group IDs, you can use +@code{getgrouplist}. To use @code{getgrouplist}, your programs should +include the header file @file{grp.h}. +@pindex grp.h + +@comment grp.h +@comment BSD +@deftypefun int getgrouplist (const char *@var{user}, gid_t @var{group}, gid_t *@var{groups}, int *@var{ngroups}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @acsmem{} @acsfd{} @aculock{}}} +@c getgrouplist @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c MAX dup ok +@c malloc dup @ascuheap @acsmem +@c internal_getgrouplist dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c memcpy dup ok +@c free dup @ascuheap @acsmem +The @code{getgrouplist} function scans the group database for all the +groups @var{user} belongs to. Up to *@var{ngroups} group IDs +corresponding to these groups are stored in the array @var{groups}; the +return value from the function is the number of group IDs actually +stored. If *@var{ngroups} is smaller than the total number of groups +found, then @code{getgrouplist} returns a value of @code{-1} and stores +the actual number of groups in *@var{ngroups}. The group @var{group} is +automatically included in the list of groups returned by +@code{getgrouplist}. + +Here's how to use @code{getgrouplist} to read all supplementary groups +for @var{user}: + +@smallexample +@group +gid_t * +supplementary_groups (char *user) +@{ + int ngroups = 16; + gid_t *groups + = (gid_t *) xmalloc (ngroups * sizeof (gid_t)); + struct passwd *pw = getpwnam (user); + + if (pw == NULL) + return NULL; + + if (getgrouplist (pw->pw_name, pw->pw_gid, groups, &ngroups) < 0) + @{ + groups = xrealloc (ngroups * sizeof (gid_t)); + getgrouplist (pw->pw_name, pw->pw_gid, groups, &ngroups); + @} + return groups; +@} +@end group +@end smallexample +@end deftypefun + +@node Enable/Disable Setuid +@section Enabling and Disabling Setuid Access + +A typical setuid program does not need its special access all of the +time. It's a good idea to turn off this access when it isn't needed, +so it can't possibly give unintended access. + +If the system supports the @code{_POSIX_SAVED_IDS} feature, you can +accomplish this with @code{seteuid}. When the game program starts, its +real user ID is @code{jdoe}, its effective user ID is @code{games}, and +its saved user ID is also @code{games}. The program should record both +user ID values once at the beginning, like this: + +@smallexample +user_user_id = getuid (); +game_user_id = geteuid (); +@end smallexample + +Then it can turn off game file access with + +@smallexample +seteuid (user_user_id); +@end smallexample + +@noindent +and turn it on with + +@smallexample +seteuid (game_user_id); +@end smallexample + +@noindent +Throughout this process, the real user ID remains @code{jdoe} and the +file user ID remains @code{games}, so the program can always set its +effective user ID to either one. + +On other systems that don't support file user IDs, you can +turn setuid access on and off by using @code{setreuid} to swap the real +and effective user IDs of the process, as follows: + +@smallexample +setreuid (geteuid (), getuid ()); +@end smallexample + +@noindent +This special case is always allowed---it cannot fail. + +Why does this have the effect of toggling the setuid access? Suppose a +game program has just started, and its real user ID is @code{jdoe} while +its effective user ID is @code{games}. In this state, the game can +write the scores file. If it swaps the two uids, the real becomes +@code{games} and the effective becomes @code{jdoe}; now the program has +only @code{jdoe} access. Another swap brings @code{games} back to +the effective user ID and restores access to the scores file. + +In order to handle both kinds of systems, test for the saved user ID +feature with a preprocessor conditional, like this: + +@smallexample +#ifdef _POSIX_SAVED_IDS + seteuid (user_user_id); +#else + setreuid (geteuid (), getuid ()); +#endif +@end smallexample + +@node Setuid Program Example +@section Setuid Program Example + +Here's an example showing how to set up a program that changes its +effective user ID. + +This is part of a game program called @code{caber-toss} that manipulates +a file @file{scores} that should be writable only by the game program +itself. The program assumes that its executable file will be installed +with the setuid bit set and owned by the same user as the @file{scores} +file. Typically, a system administrator will set up an account like +@code{games} for this purpose. + +The executable file is given mode @code{4755}, so that doing an +@samp{ls -l} on it produces output like: + +@smallexample +-rwsr-xr-x 1 games 184422 Jul 30 15:17 caber-toss +@end smallexample + +@noindent +The setuid bit shows up in the file modes as the @samp{s}. + +The scores file is given mode @code{644}, and doing an @samp{ls -l} on +it shows: + +@smallexample +-rw-r--r-- 1 games 0 Jul 31 15:33 scores +@end smallexample + +Here are the parts of the program that show how to set up the changed +user ID. This program is conditionalized so that it makes use of the +file IDs feature if it is supported, and otherwise uses @code{setreuid} +to swap the effective and real user IDs. + +@smallexample +#include <stdio.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdlib.h> + + +/* @r{Remember the effective and real UIDs.} */ + +static uid_t euid, ruid; + + +/* @r{Restore the effective UID to its original value.} */ + +void +do_setuid (void) +@{ + int status; + +#ifdef _POSIX_SAVED_IDS + status = seteuid (euid); +#else + status = setreuid (ruid, euid); +#endif + if (status < 0) @{ + fprintf (stderr, "Couldn't set uid.\n"); + exit (status); + @} +@} + + +@group +/* @r{Set the effective UID to the real UID.} */ + +void +undo_setuid (void) +@{ + int status; + +#ifdef _POSIX_SAVED_IDS + status = seteuid (ruid); +#else + status = setreuid (euid, ruid); +#endif + if (status < 0) @{ + fprintf (stderr, "Couldn't set uid.\n"); + exit (status); + @} +@} +@end group + +/* @r{Main program.} */ + +int +main (void) +@{ + /* @r{Remember the real and effective user IDs.} */ + ruid = getuid (); + euid = geteuid (); + undo_setuid (); + + /* @r{Do the game and record the score.} */ + @dots{} +@} +@end smallexample + +Notice how the first thing the @code{main} function does is to set the +effective user ID back to the real user ID. This is so that any other +file accesses that are performed while the user is playing the game use +the real user ID for determining permissions. Only when the program +needs to open the scores file does it switch back to the file user ID, +like this: + +@smallexample +/* @r{Record the score.} */ + +int +record_score (int score) +@{ + FILE *stream; + char *myname; + + /* @r{Open the scores file.} */ + do_setuid (); + stream = fopen (SCORES_FILE, "a"); + undo_setuid (); + +@group + /* @r{Write the score to the file.} */ + if (stream) + @{ + myname = cuserid (NULL); + if (score < 0) + fprintf (stream, "%10s: Couldn't lift the caber.\n", myname); + else + fprintf (stream, "%10s: %d feet.\n", myname, score); + fclose (stream); + return 0; + @} + else + return -1; +@} +@end group +@end smallexample + +@node Tips for Setuid +@section Tips for Writing Setuid Programs + +It is easy for setuid programs to give the user access that isn't +intended---in fact, if you want to avoid this, you need to be careful. +Here are some guidelines for preventing unintended access and +minimizing its consequences when it does occur: + +@itemize @bullet +@item +Don't have @code{setuid} programs with privileged user IDs such as +@code{root} unless it is absolutely necessary. If the resource is +specific to your particular program, it's better to define a new, +nonprivileged user ID or group ID just to manage that resource. +It's better if you can write your program to use a special group than a +special user. + +@item +Be cautious about using the @code{exec} functions in combination with +changing the effective user ID. Don't let users of your program execute +arbitrary programs under a changed user ID. Executing a shell is +especially bad news. Less obviously, the @code{execlp} and @code{execvp} +functions are a potential risk (since the program they execute depends +on the user's @code{PATH} environment variable). + +If you must @code{exec} another program under a changed ID, specify an +absolute file name (@pxref{File Name Resolution}) for the executable, +and make sure that the protections on that executable and @emph{all} +containing directories are such that ordinary users cannot replace it +with some other program. + +You should also check the arguments passed to the program to make sure +they do not have unexpected effects. Likewise, you should examine the +environment variables. Decide which arguments and variables are safe, +and reject all others. + +You should never use @code{system} in a privileged program, because it +invokes a shell. + +@item +Only use the user ID controlling the resource in the part of the program +that actually uses that resource. When you're finished with it, restore +the effective user ID back to the actual user's user ID. +@xref{Enable/Disable Setuid}. + +@item +If the @code{setuid} part of your program needs to access other files +besides the controlled resource, it should verify that the real user +would ordinarily have permission to access those files. You can use the +@code{access} function (@pxref{Access Permission}) to check this; it +uses the real user and group IDs, rather than the effective IDs. +@end itemize + +@node Who Logged In +@section Identifying Who Logged In +@cindex login name, determining +@cindex user ID, determining + +You can use the functions listed in this section to determine the login +name of the user who is running a process, and the name of the user who +logged in the current session. See also the function @code{getuid} and +friends (@pxref{Reading Persona}). How this information is collected by +the system and how to control/add/remove information from the background +storage is described in @ref{User Accounting Database}. + +The @code{getlogin} function is declared in @file{unistd.h}, while +@code{cuserid} and @code{L_cuserid} are declared in @file{stdio.h}. +@pindex stdio.h +@pindex unistd.h + +@comment unistd.h +@comment POSIX.1 +@deftypefun {char *} getlogin (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:getlogin} @mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getlogin (linux) @mtasurace:getlogin @mtasurace:utent @mtascusig:ALRM @mtascutimer @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c getlogin_r_loginuid dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c getlogin_fd0 (unix) @mtasurace:getlogin @mtasurace:utent @mtascusig:ALRM @mtascutimer @ascuheap @asulock @aculock @acsfd @acsmem +@c uses static buffer name => @mtasurace:getlogin +@c ttyname_r dup @ascuheap @acsmem @acsfd +@c strncpy dup ok +@c setutent dup @mtasurace:utent @asulock @aculock @acsfd +@c getutline_r dup @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +@c endutent dup @mtasurace:utent @asulock @aculock +@c libc_lock_unlock dup ok +@c strlen dup ok +@c memcpy dup ok +@c +@c getlogin_r (linux) @mtasurace:utent @mtascusig:ALRM @mtascutimer @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c getlogin_r_loginuid @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c open_not_cancel_2 dup @acsfd +@c read_not_cancel dup ok +@c close_not_cancel_no_status dup @acsfd +@c strtoul @mtslocale +@c getpwuid_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @asulock @aculock @acsfd @acsmem +@c strlen dup ok +@c memcpy dup ok +@c free dup @asulock @aculock @acsfd @acsmem +@c getlogin_r_fd0 (unix) @mtasurace:utent @mtascusig:ALRM @mtascutimer @ascuheap @asulock @aculock @acsmem @acsfd +@c ttyname_r dup @ascuheap @acsmem @acsfd +@c strncpy dup ok +@c libc_lock_lock dup @asulock @aculock +@c *libc_utmp_jump_table->setutent dup @mtasurace:utent @acsfd +@c *libc_utmp_jump_table->getutline_r dup @mtasurace:utent @mtascusig:ALRM @mtascutimer +@c *libc_utmp_jump_table->endutent dup @mtasurace:utent @asulock @aculock +@c libc_lock_unlock dup ok +@c strlen dup ok +@c memcpy dup ok +The @code{getlogin} function returns a pointer to a string containing the +name of the user logged in on the controlling terminal of the process, +or a null pointer if this information cannot be determined. The string +is statically allocated and might be overwritten on subsequent calls to +this function or to @code{cuserid}. +@end deftypefun + +@comment stdio.h +@comment POSIX.1 +@deftypefun {char *} cuserid (char *@var{string}) +@safety{@prelim{}@mtunsafe{@mtasurace{:cuserid/!string} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c cuserid @mtasurace:cuserid/!string @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c if string is NULL, cuserid will overwrite and return a static buffer +@c geteuid dup ok +@c getpwuid_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c strncpy dup ok +The @code{cuserid} function returns a pointer to a string containing a +user name associated with the effective ID of the process. If +@var{string} is not a null pointer, it should be an array that can hold +at least @code{L_cuserid} characters; the string is returned in this +array. Otherwise, a pointer to a string in a static area is returned. +This string is statically allocated and might be overwritten on +subsequent calls to this function or to @code{getlogin}. + +The use of this function is deprecated since it is marked to be +withdrawn in XPG4.2 and has already been removed from newer revisions of +POSIX.1. +@end deftypefun + +@comment stdio.h +@comment POSIX.1 +@deftypevr Macro int L_cuserid +An integer constant that indicates how long an array you might need to +store a user name. +@end deftypevr + +These functions let your program identify positively the user who is +running or the user who logged in this session. (These can differ when +setuid programs are involved; see @ref{Process Persona}.) The user cannot +do anything to fool these functions. + +For most purposes, it is more useful to use the environment variable +@code{LOGNAME} to find out who the user is. This is more flexible +precisely because the user can set @code{LOGNAME} arbitrarily. +@xref{Standard Environment}. + + +@node User Accounting Database +@section The User Accounting Database +@cindex user accounting database + +Most Unix-like operating systems keep track of logged in users by +maintaining a user accounting database. This user accounting database +stores for each terminal, who has logged on, at what time, the process +ID of the user's login shell, etc., etc., but also stores information +about the run level of the system, the time of the last system reboot, +and possibly more. + +The user accounting database typically lives in @file{/etc/utmp}, +@file{/var/adm/utmp} or @file{/var/run/utmp}. However, these files +should @strong{never} be accessed directly. For reading information +from and writing information to the user accounting database, the +functions described in this section should be used. + + +@menu +* Manipulating the Database:: Scanning and modifying the user + accounting database. +* XPG Functions:: A standardized way for doing the same thing. +* Logging In and Out:: Functions from BSD that modify the user + accounting database. +@end menu + +@node Manipulating the Database +@subsection Manipulating the User Accounting Database + +These functions and the corresponding data structures are declared in +the header file @file{utmp.h}. +@pindex utmp.h + +@comment utmp.h +@comment SVID +@deftp {Data Type} {struct exit_status} +The @code{exit_status} data structure is used to hold information about +the exit status of processes marked as @code{DEAD_PROCESS} in the user +accounting database. + +@table @code +@item short int e_termination +The exit status of the process. + +@item short int e_exit +The exit status of the process. +@end table +@end deftp + +@deftp {Data Type} {struct utmp} +The @code{utmp} data structure is used to hold information about entries +in the user accounting database. On @gnusystems{} it has the following +members: + +@table @code +@item short int ut_type +Specifies the type of login; one of @code{EMPTY}, @code{RUN_LVL}, +@code{BOOT_TIME}, @code{OLD_TIME}, @code{NEW_TIME}, @code{INIT_PROCESS}, +@code{LOGIN_PROCESS}, @code{USER_PROCESS}, @code{DEAD_PROCESS} or +@code{ACCOUNTING}. + +@item pid_t ut_pid +The process ID number of the login process. + +@item char ut_line[] +The device name of the tty (without @file{/dev/}). + +@item char ut_id[] +The inittab ID of the process. + +@item char ut_user[] +The user's login name. + +@item char ut_host[] +The name of the host from which the user logged in. + +@item struct exit_status ut_exit +The exit status of a process marked as @code{DEAD_PROCESS}. + +@item long ut_session +The Session ID, used for windowing. + +@item struct timeval ut_tv +Time the entry was made. For entries of type @code{OLD_TIME} this is +the time when the system clock changed, and for entries of type +@code{NEW_TIME} this is the time the system clock was set to. + +@item int32_t ut_addr_v6[4] +The Internet address of a remote host. +@end table +@end deftp + +The @code{ut_type}, @code{ut_pid}, @code{ut_id}, @code{ut_tv}, and +@code{ut_host} fields are not available on all systems. Portable +applications therefore should be prepared for these situations. To help +do this the @file{utmp.h} header provides macros +@code{_HAVE_UT_TYPE}, @code{_HAVE_UT_PID}, @code{_HAVE_UT_ID}, +@code{_HAVE_UT_TV}, and @code{_HAVE_UT_HOST} if the respective field is +available. The programmer can handle the situations by using +@code{#ifdef} in the program code. + +The following macros are defined for use as values for the +@code{ut_type} member of the @code{utmp} structure. The values are +integer constants. + +@vtable @code +@comment utmp.h +@comment SVID +@item EMPTY +This macro is used to indicate that the entry contains no valid user +accounting information. + +@comment utmp.h +@comment SVID +@item RUN_LVL +This macro is used to identify the system's runlevel. + +@comment utmp.h +@comment SVID +@item BOOT_TIME +This macro is used to identify the time of system boot. + +@comment utmp.h +@comment SVID +@item OLD_TIME +This macro is used to identify the time when the system clock changed. + +@comment utmp.h +@comment SVID +@item NEW_TIME +This macro is used to identify the time after the system clock changed. + +@comment utmp.h +@comment SVID +@item INIT_PROCESS +This macro is used to identify a process spawned by the init process. + +@comment utmp.h +@comment SVID +@item LOGIN_PROCESS +This macro is used to identify the session leader of a logged in user. + +@comment utmp.h +@comment SVID +@item USER_PROCESS +This macro is used to identify a user process. + +@comment utmp.h +@comment SVID +@item DEAD_PROCESS +This macro is used to identify a terminated process. + +@comment utmp.h +@comment SVID +@item ACCOUNTING +??? +@end vtable + +The size of the @code{ut_line}, @code{ut_id}, @code{ut_user} and +@code{ut_host} arrays can be found using the @code{sizeof} operator. + +Many older systems have, instead of an @code{ut_tv} member, an +@code{ut_time} member, usually of type @code{time_t}, for representing +the time associated with the entry. Therefore, for backwards +compatibility only, @file{utmp.h} defines @code{ut_time} as an alias for +@code{ut_tv.tv_sec}. + +@comment utmp.h +@comment SVID +@deftypefun void setutent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent}}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c Besides the static variables in utmp_file.c, there's the jump_table. +@c They're both modified while holding a lock, but other threads may +@c cause the variables to be modified between calling this function and +@c others that rely on the internal state it sets up. + +@c setutent @mtasurace:utent @asulock @aculock @acsfd +@c libc_lock_lock dup @asulock @aculock +@c *libc_utmp_jump_table->setutent @mtasurace:utent @acsfd +@c setutent_unknown @mtasurace:utent @acsfd +@c *libc_utmp_file_functions.setutent = setutent_file @mtasurace:utent @acsfd +@c open_not_cancel_2 dup @acsfd +@c fcntl_not_cancel dup ok +@c close_not_cancel_no_status dup @acsfd +@c lseek64 dup ok +@c libc_lock_unlock dup ok +This function opens the user accounting database to begin scanning it. +You can then call @code{getutent}, @code{getutid} or @code{getutline} to +read entries and @code{pututline} to write entries. + +If the database is already open, it resets the input to the beginning of +the database. +@end deftypefun + +@comment utmp.h +@comment SVID +@deftypefun {struct utmp *} getutent (void) +@safety{@prelim{}@mtunsafe{@mtuinit{} @mtasurace{:utent} @mtasurace{:utentbuf} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c The static buffer that holds results is allocated with malloc at +@c the first call; the test is not thread-safe, so multiple concurrent +@c calls could malloc multiple buffers. + +@c getutent @mtuinit @mtasurace:utent @mtasurace:utentbuf @mtascusig:ALRM @mtascutimer @ascuheap @asulock @aculock @acsfd @acsmem +@c malloc @asulock @aculock @acsfd @acsmem +@c getutent_r dup @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +The @code{getutent} function reads the next entry from the user +accounting database. It returns a pointer to the entry, which is +statically allocated and may be overwritten by subsequent calls to +@code{getutent}. You must copy the contents of the structure if you +wish to save the information or you can use the @code{getutent_r} +function which stores the data in a user-provided buffer. + +A null pointer is returned in case no further entry is available. +@end deftypefun + +@comment utmp.h +@comment SVID +@deftypefun void endutent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent}}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c endutent @mtasurace:utent @asulock @aculock @acsfd +@c libc_lock_lock dup @asulock @aculock +@c *libc_utmp_jump_table->endutent @mtasurace:utent @acsfd +@c endutent_unknown ok +@c endutent_file @mtasurace:utent @acsfd +@c close_not_cancel_no_status dup @acsfd +@c libc_lock_unlock dup ok +This function closes the user accounting database. +@end deftypefun + +@comment utmp.h +@comment SVID +@deftypefun {struct utmp *} getutid (const struct utmp *@var{id}) +@safety{@prelim{}@mtunsafe{@mtuinit{} @mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +@c Same caveats as getutline. +@c +@c getutid @mtuinit @mtasurace:utent @mtascusig:ALRM @mtascutimer @ascuheap @asulock @aculock @acsmem @acsfd +@c uses a static buffer malloced on the first call +@c malloc dup @ascuheap @acsmem +@c getutid_r dup @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +This function searches forward from the current point in the database +for an entry that matches @var{id}. If the @code{ut_type} member of the +@var{id} structure is one of @code{RUN_LVL}, @code{BOOT_TIME}, +@code{OLD_TIME} or @code{NEW_TIME} the entries match if the +@code{ut_type} members are identical. If the @code{ut_type} member of +the @var{id} structure is @code{INIT_PROCESS}, @code{LOGIN_PROCESS}, +@code{USER_PROCESS} or @code{DEAD_PROCESS}, the entries match if the +@code{ut_type} member of the entry read from the database is one of +these four, and the @code{ut_id} members match. However if the +@code{ut_id} member of either the @var{id} structure or the entry read +from the database is empty it checks if the @code{ut_line} members match +instead. If a matching entry is found, @code{getutid} returns a pointer +to the entry, which is statically allocated, and may be overwritten by a +subsequent call to @code{getutent}, @code{getutid} or @code{getutline}. +You must copy the contents of the structure if you wish to save the +information. + +A null pointer is returned in case the end of the database is reached +without a match. + +The @code{getutid} function may cache the last read entry. Therefore, +if you are using @code{getutid} to search for multiple occurrences, it +is necessary to zero out the static data after each call. Otherwise +@code{getutid} could just return a pointer to the same entry over and +over again. +@end deftypefun + +@comment utmp.h +@comment SVID +@deftypefun {struct utmp *} getutline (const struct utmp *@var{line}) +@safety{@prelim{}@mtunsafe{@mtuinit{} @mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c The static buffer that holds results is allocated with malloc at +@c the first call; the test is not thread-safe, so multiple concurrent +@c calls could malloc multiple buffers. + +@c getutline @mtuinit @mtasurace:utent @mtascusig:ALRM @mtascutimer @ascuheap @asulock @aculock @acsfd @acsmem +@c malloc @asulock @aculock @acsfd @acsmem +@c getutline_r dup @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +This function searches forward from the current point in the database +until it finds an entry whose @code{ut_type} value is +@code{LOGIN_PROCESS} or @code{USER_PROCESS}, and whose @code{ut_line} +member matches the @code{ut_line} member of the @var{line} structure. +If it finds such an entry, it returns a pointer to the entry which is +statically allocated, and may be overwritten by a subsequent call to +@code{getutent}, @code{getutid} or @code{getutline}. You must copy the +contents of the structure if you wish to save the information. + +A null pointer is returned in case the end of the database is reached +without a match. + +The @code{getutline} function may cache the last read entry. Therefore +if you are using @code{getutline} to search for multiple occurrences, it +is necessary to zero out the static data after each call. Otherwise +@code{getutline} could just return a pointer to the same entry over and +over again. +@end deftypefun + +@comment utmp.h +@comment SVID +@deftypefun {struct utmp *} pututline (const struct utmp *@var{utmp}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c pututline @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +@c libc_lock_lock dup @asulock @aculock +@c *libc_utmp_jump_table->pututline @mtasurace:utent @mtascusig:ALRM @mtascutimer @acsfd +@c pututline_unknown @mtasurace:utent @acsfd +@c setutent_unknown dup @mtasurace:utent @acsfd +@c pututline_file @mtascusig:ALRM @mtascutimer @acsfd +@c TRANSFORM_UTMP_FILE_NAME ok +@c strcmp dup ok +@c acesss dup ok +@c open_not_cancel_2 dup @acsfd +@c fcntl_not_cancel dup ok +@c close_not_cancel_no_status dup @acsfd +@c llseek dup ok +@c dup2 dup ok +@c utmp_equal dup ok +@c internal_getut_r dup @mtascusig:ALRM @mtascutimer +@c LOCK_FILE dup @mtascusig:ALRM @mtasctimer +@c LOCKING_FAILED dup ok +@c ftruncate64 dup ok +@c write_not_cancel dup ok +@c UNLOCK_FILE dup @mtasctimer +@c libc_lock_unlock dup @aculock +The @code{pututline} function inserts the entry @code{*@var{utmp}} at +the appropriate place in the user accounting database. If it finds that +it is not already at the correct place in the database, it uses +@code{getutid} to search for the position to insert the entry, however +this will not modify the static structure returned by @code{getutent}, +@code{getutid} and @code{getutline}. If this search fails, the entry +is appended to the database. + +The @code{pututline} function returns a pointer to a copy of the entry +inserted in the user accounting database, or a null pointer if the entry +could not be added. The following @code{errno} error conditions are +defined for this function: + +@table @code +@item EPERM +The process does not have the appropriate privileges; you cannot modify +the user accounting database. +@end table +@end deftypefun + +All the @code{get*} functions mentioned before store the information +they return in a static buffer. This can be a problem in multi-threaded +programs since the data returned for the request is overwritten by the +return value data in another thread. Therefore @theglibc{} +provides as extensions three more functions which return the data in a +user-provided buffer. + +@comment utmp.h +@comment GNU +@deftypefun int getutent_r (struct utmp *@var{buffer}, struct utmp **@var{result}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c getutent_r @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +@c libc_lock_lock dup @asulock @aculock +@c *libc_utmp_jump_table->getutent_r @mtasurace:utent @mtascusig:ALRM @mtascutimer @acsfd +@c getutent_r_unknown @mtasurace:utent @acsfd +@c setutent_unknown dup @mtasurace:utent @acsfd +@c getutent_r_file @mtasurace:utent @mtascusig:ALRM @mtascutimer +@c LOCK_FILE @mtascusig:ALRM @mtascutimer +@c alarm dup @mtascutimer +@c sigemptyset dup ok +@c sigaction dup ok +@c memset dup ok +@c fcntl_not_cancel dup ok +@c LOCKING_FAILED ok +@c read_not_cancel dup ok +@c UNLOCK_FILE @mtascutimer +@c fcntl_not_cancel dup ok +@c alarm dup @mtascutimer +@c sigaction dup ok +@c memcpy dup ok +@c libc_lock_unlock dup ok +The @code{getutent_r} is equivalent to the @code{getutent} function. It +returns the next entry from the database. But instead of storing the +information in a static buffer it stores it in the buffer pointed to by +the parameter @var{buffer}. + +If the call was successful, the function returns @code{0} and the +pointer variable pointed to by the parameter @var{result} contains a +pointer to the buffer which contains the result (this is most probably +the same value as @var{buffer}). If something went wrong during the +execution of @code{getutent_r} the function returns @code{-1}. + +This function is a GNU extension. +@end deftypefun + +@comment utmp.h +@comment GNU +@deftypefun int getutid_r (const struct utmp *@var{id}, struct utmp *@var{buffer}, struct utmp **@var{result}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c getutid_r @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +@c libc_lock_lock dup @asulock @aculock +@c *libc_utmp_jump_table->getutid_r @mtasurace:utent @mtascusig:ALRM @mtascutimer @acsfd +@c getutid_r_unknown @mtasurace:utent @acsfd +@c setutent_unknown dup @mtasurace:utent @acsfd +@c getutid_r_file @mtascusig:ALRM @mtascutimer +@c internal_getut_r @mtascusig:ALRM @mtascutimer +@c LOCK_FILE dup @mtascusig:ALRM @mtascutimer +@c LOCKING_FAILED dup ok +@c read_not_cancel dup ok +@c utmp_equal ok +@c strncmp dup ok +@c UNLOCK_FILE dup @mtascutimer +@c memcpy dup ok +@c libc_lock_unlock dup @aculock +This function retrieves just like @code{getutid} the next entry matching +the information stored in @var{id}. But the result is stored in the +buffer pointed to by the parameter @var{buffer}. + +If successful the function returns @code{0} and the pointer variable +pointed to by the parameter @var{result} contains a pointer to the +buffer with the result (probably the same as @var{result}. If not +successful the function return @code{-1}. + +This function is a GNU extension. +@end deftypefun + +@comment utmp.h +@comment GNU +@deftypefun int getutline_r (const struct utmp *@var{line}, struct utmp *@var{buffer}, struct utmp **@var{result}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +@c getutline_r @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +@c libc_lock_lock dup @asulock @aculock +@c *libc_utmp_jump_table->getutline_r @mtasurace:utent @mtascusig:ALRM @mtascutimer @acsfd +@c getutline_r_unknown @mtasurace:utent @acsfd +@c setutent_unknown dup @mtasurace:utent @acsfd +@c getutline_r_file @mtasurace:utent @mtascusig:ALRM @mtascutimer +@c LOCK_FILE @mtascusig:ALRM @mtascutimer +@c alarm dup @mtascutimer +@c sigemptyset dup ok +@c sigaction dup ok +@c memset dup ok +@c fcntl_not_cancel dup ok +@c LOCKING_FAILED ok +@c read_not_cancel dup ok +@c strncmp dup ok +@c UNLOCK_FILE @mtascutimer +@c fcntl_not_cancel dup ok +@c alarm dup @mtascutimer +@c sigaction dup ok +@c memcpy dup ok +@c libc_lock_unlock dup ok +This function retrieves just like @code{getutline} the next entry +matching the information stored in @var{line}. But the result is stored +in the buffer pointed to by the parameter @var{buffer}. + +If successful the function returns @code{0} and the pointer variable +pointed to by the parameter @var{result} contains a pointer to the +buffer with the result (probably the same as @var{result}. If not +successful the function return @code{-1}. + +This function is a GNU extension. +@end deftypefun + + +In addition to the user accounting database, most systems keep a number +of similar databases. For example most systems keep a log file with all +previous logins (usually in @file{/etc/wtmp} or @file{/var/log/wtmp}). + +For specifying which database to examine, the following function should +be used. + +@comment utmp.h +@comment SVID +@deftypefun int utmpname (const char *@var{file}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent}}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +@c utmpname @mtasurace:utent @asulock @ascuheap @aculock @acsmem +@c libc_lock_lock dup @asulock @aculock +@c *libc_utmp_jump_table->endutent dup @mtasurace:utent +@c strcmp dup ok +@c free dup @ascuheap @acsmem +@c strdup dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +The @code{utmpname} function changes the name of the database to be +examined to @var{file}, and closes any previously opened database. By +default @code{getutent}, @code{getutid}, @code{getutline} and +@code{pututline} read from and write to the user accounting database. + +The following macros are defined for use as the @var{file} argument: + +@deftypevr Macro {char *} _PATH_UTMP +This macro is used to specify the user accounting database. +@end deftypevr + +@deftypevr Macro {char *} _PATH_WTMP +This macro is used to specify the user accounting log file. +@end deftypevr + +The @code{utmpname} function returns a value of @code{0} if the new name +was successfully stored, and a value of @code{-1} to indicate an error. +Note that @code{utmpname} does not try to open the database, and that +therefore the return value does not say anything about whether the +database can be successfully opened. +@end deftypefun + +Specially for maintaining log-like databases @theglibc{} provides +the following function: + +@comment utmp.h +@comment SVID +@deftypefun void updwtmp (const char *@var{wtmp_file}, const struct utmp *@var{utmp}) +@safety{@prelim{}@mtunsafe{@mtascusig{:ALRM} @mtascutimer{}}@asunsafe{}@acunsafe{@acsfd{}}} +@c updwtmp @mtascusig:ALRM @mtascutimer @acsfd +@c TRANSFORM_UTMP_FILE_NAME dup ok +@c *libc_utmp_file_functions->updwtmp = updwtmp_file @mtascusig:ALRM @mtascutimer @acsfd +@c open_not_cancel_2 dup @acsfd +@c LOCK_FILE dup @mtascusig:ALRM @mtascutimer +@c LOCKING_FAILED dup ok +@c lseek64 dup ok +@c ftruncate64 dup ok +@c write_not_cancel dup ok +@c UNLOCK_FILE dup @mtascutimer +@c close_not_cancel_no_status dup @acsfd +The @code{updwtmp} function appends the entry *@var{utmp} to the +database specified by @var{wtmp_file}. For possible values for the +@var{wtmp_file} argument see the @code{utmpname} function. +@end deftypefun + +@strong{Portability Note:} Although many operating systems provide a +subset of these functions, they are not standardized. There are often +subtle differences in the return types, and there are considerable +differences between the various definitions of @code{struct utmp}. When +programming for @theglibc{}, it is probably best to stick +with the functions described in this section. If however, you want your +program to be portable, consider using the XPG functions described in +@ref{XPG Functions}, or take a look at the BSD compatible functions in +@ref{Logging In and Out}. + + +@node XPG Functions +@subsection XPG User Accounting Database Functions + +These functions, described in the X/Open Portability Guide, are declared +in the header file @file{utmpx.h}. +@pindex utmpx.h + +@deftp {Data Type} {struct utmpx} +The @code{utmpx} data structure contains at least the following members: + +@table @code +@item short int ut_type +Specifies the type of login; one of @code{EMPTY}, @code{RUN_LVL}, +@code{BOOT_TIME}, @code{OLD_TIME}, @code{NEW_TIME}, @code{INIT_PROCESS}, +@code{LOGIN_PROCESS}, @code{USER_PROCESS} or @code{DEAD_PROCESS}. + +@item pid_t ut_pid +The process ID number of the login process. + +@item char ut_line[] +The device name of the tty (without @file{/dev/}). + +@item char ut_id[] +The inittab ID of the process. + +@item char ut_user[] +The user's login name. + +@item struct timeval ut_tv +Time the entry was made. For entries of type @code{OLD_TIME} this is +the time when the system clock changed, and for entries of type +@code{NEW_TIME} this is the time the system clock was set to. +@end table +In @theglibc{}, @code{struct utmpx} is identical to @code{struct +utmp} except for the fact that including @file{utmpx.h} does not make +visible the declaration of @code{struct exit_status}. +@end deftp + +The following macros are defined for use as values for the +@code{ut_type} member of the @code{utmpx} structure. The values are +integer constants and are, in @theglibc{}, identical to the +definitions in @file{utmp.h}. + +@vtable @code +@comment utmpx.h +@comment XPG4.2 +@item EMPTY +This macro is used to indicate that the entry contains no valid user +accounting information. + +@comment utmpx.h +@comment XPG4.2 +@item RUN_LVL +This macro is used to identify the system's runlevel. + +@comment utmpx.h +@comment XPG4.2 +@item BOOT_TIME +This macro is used to identify the time of system boot. + +@comment utmpx.h +@comment XPG4.2 +@item OLD_TIME +This macro is used to identify the time when the system clock changed. + +@comment utmpx.h +@comment XPG4.2 +@item NEW_TIME +This macro is used to identify the time after the system clock changed. + +@comment utmpx.h +@comment XPG4.2 +@item INIT_PROCESS +This macro is used to identify a process spawned by the init process. + +@comment utmpx.h +@comment XPG4.2 +@item LOGIN_PROCESS +This macro is used to identify the session leader of a logged in user. + +@comment utmpx.h +@comment XPG4.2 +@item USER_PROCESS +This macro is used to identify a user process. + +@comment utmpx.h +@comment XPG4.2 +@item DEAD_PROCESS +This macro is used to identify a terminated process. +@end vtable + +The size of the @code{ut_line}, @code{ut_id} and @code{ut_user} arrays +can be found using the @code{sizeof} operator. + +@comment utmpx.h +@comment XPG4.2 +@deftypefun void setutxent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent}}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +This function is similar to @code{setutent}. In @theglibc{} it is +simply an alias for @code{setutent}. +@end deftypefun + +@comment utmpx.h +@comment XPG4.2 +@deftypefun {struct utmpx *} getutxent (void) +@safety{@prelim{}@mtunsafe{@mtuinit{} @mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +The @code{getutxent} function is similar to @code{getutent}, but returns +a pointer to a @code{struct utmpx} instead of @code{struct utmp}. In +@theglibc{} it simply is an alias for @code{getutent}. +@end deftypefun + +@comment utmpx.h +@comment XPG4.2 +@deftypefun void endutxent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent}}@asunsafe{@asulock{}}@acunsafe{@aculock{}}} +This function is similar to @code{endutent}. In @theglibc{} it is +simply an alias for @code{endutent}. +@end deftypefun + +@comment utmpx.h +@comment XPG4.2 +@deftypefun {struct utmpx *} getutxid (const struct utmpx *@var{id}) +@safety{@prelim{}@mtunsafe{@mtuinit{} @mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{} @acsfd{}}} +This function is similar to @code{getutid}, but uses @code{struct utmpx} +instead of @code{struct utmp}. In @theglibc{} it is simply an alias +for @code{getutid}. +@end deftypefun + +@comment utmpx.h +@comment XPG4.2 +@deftypefun {struct utmpx *} getutxline (const struct utmpx *@var{line}) +@safety{@prelim{}@mtunsafe{@mtuinit{} @mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +This function is similar to @code{getutid}, but uses @code{struct utmpx} +instead of @code{struct utmp}. In @theglibc{} it is simply an alias +for @code{getutline}. +@end deftypefun + +@comment utmpx.h +@comment XPG4.2 +@deftypefun {struct utmpx *} pututxline (const struct utmpx *@var{utmp}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{}}} +The @code{pututxline} function is functionally identical to +@code{pututline}, but uses @code{struct utmpx} instead of @code{struct +utmp}. In @theglibc{}, @code{pututxline} is simply an alias for +@code{pututline}. +@end deftypefun + +@comment utmpx.h +@comment XPG4.2 +@deftypefun int utmpxname (const char *@var{file}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent}}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsmem{}}} +The @code{utmpxname} function is functionally identical to +@code{utmpname}. In @theglibc{}, @code{utmpxname} is simply an +alias for @code{utmpname}. +@end deftypefun + +You can translate between a traditional @code{struct utmp} and an XPG +@code{struct utmpx} with the following functions. In @theglibc{}, +these functions are merely copies, since the two structures are +identical. + +@comment utmp.h utmpx.h +@comment GNU +@deftypefun int getutmp (const struct utmpx *@var{utmpx}, struct utmp *@var{utmp}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{getutmp} copies the information, insofar as the structures are +compatible, from @var{utmpx} to @var{utmp}. +@end deftypefun + +@comment utmp.h utmpx.h +@comment GNU +@deftypefun int getutmpx (const struct utmp *@var{utmp}, struct utmpx *@var{utmpx}) +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}} +@code{getutmpx} copies the information, insofar as the structures are +compatible, from @var{utmp} to @var{utmpx}. +@end deftypefun + + +@node Logging In and Out +@subsection Logging In and Out + +These functions, derived from BSD, are available in the separate +@file{libutil} library, and declared in @file{utmp.h}. +@pindex utmp.h + +Note that the @code{ut_user} member of @code{struct utmp} is called +@code{ut_name} in BSD. Therefore, @code{ut_name} is defined as an alias +for @code{ut_user} in @file{utmp.h}. + +@comment utmp.h +@comment BSD +@deftypefun int login_tty (int @var{filedes}) +@safety{@prelim{}@mtunsafe{@mtasurace{:ttyname}}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c If this function is canceled, it may have succeeded in redirecting +@c only some of the standard streams to the newly opened terminal. +@c Should there be a safety annotation for this? +@c login_tty @mtasurace:ttyname @ascuheap @asulock @aculock @acsmem @acsfd +@c setsid dup ok +@c ioctl dup ok +@c ttyname dup @mtasurace:ttyname @ascuheap @asulock @aculock @acsmem @acsfd +@c close dup @acsfd +@c open dup @acsfd +@c dup2 dup ok +This function makes @var{filedes} the controlling terminal of the +current process, redirects standard input, standard output and +standard error output to this terminal, and closes @var{filedes}. + +This function returns @code{0} on successful completion, and @code{-1} +on error. +@end deftypefun + +@comment utmp.h +@comment BSD +@deftypefun void login (const struct utmp *@var{entry}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acucorrupt{} @acsfd{} @acsmem{}}} +@c login @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @ascuheap @aculock @acucorrupt @acsfd @acsmem +@c getpid dup ok +@c tty_name @ascuheap @acucorrupt @acsmem @acsfd +@c ttyname_r dup @ascuheap @acsmem @acsfd +@c memchr dup ok +@c realloc dup @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c strncmp dup ok +@c basename dup ok +@c strncpy dup ok +@c utmpname dup @mtasurace:utent @asulock @ascuheap @aculock @acsmem +@c setutent dup @mtasurace:utent @asulock @aculock @acsfd +@c pututline dup @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +@c endutent dup @mtasurace:utent @asulock @aculock +@c free dup @ascuheap @acsmem +@c updwtmp dup @mtascusig:ALRM @mtascutimer @acsfd +The @code{login} functions inserts an entry into the user accounting +database. The @code{ut_line} member is set to the name of the terminal +on standard input. If standard input is not a terminal @code{login} +uses standard output or standard error output to determine the name of +the terminal. If @code{struct utmp} has a @code{ut_type} member, +@code{login} sets it to @code{USER_PROCESS}, and if there is an +@code{ut_pid} member, it will be set to the process ID of the current +process. The remaining entries are copied from @var{entry}. + +A copy of the entry is written to the user accounting log file. +@end deftypefun + +@comment utmp.h +@comment BSD +@deftypefun int logout (const char *@var{ut_line}) +@safety{@prelim{}@mtunsafe{@mtasurace{:utent} @mtascusig{:ALRM} @mtascutimer{}}@asunsafe{@asulock{} @ascuheap{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}} +@c logout @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @ascuheap @aculock @acsfd @acsmem +@c utmpname dup @mtasurace:utent @asulock @ascuheap @aculock @acsmem +@c setutent dup @mtasurace:utent @asulock @aculock @acsfd +@c strncpy dup ok +@c getutline_r dup @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +@c bzero dup ok +@c gettimeofday dup ok +@c time dup ok +@c pututline dup @mtasurace:utent @mtascusig:ALRM @mtascutimer @asulock @aculock @acsfd +@c endutent dup @mtasurace:utent @asulock @aculock +This function modifies the user accounting database to indicate that the +user on @var{ut_line} has logged out. + +The @code{logout} function returns @code{1} if the entry was successfully +written to the database, or @code{0} on error. +@end deftypefun + +@comment utmp.h +@comment BSD +@deftypefun void logwtmp (const char *@var{ut_line}, const char *@var{ut_name}, const char *@var{ut_host}) +@safety{@prelim{}@mtunsafe{@mtascusig{:ALRM} @mtascutimer{}}@asunsafe{}@acunsafe{@acsfd{}}} +@c logwtmp @mtascusig:ALRM @mtascutimer @acsfd +@c memset dup ok +@c getpid dup ok +@c strncpy dup ok +@c gettimeofday dup ok +@c time dup ok +@c updwtmp dup @mtascusig:ALRM @mtascutimer @acsfd +The @code{logwtmp} function appends an entry to the user accounting log +file, for the current time and the information provided in the +@var{ut_line}, @var{ut_name} and @var{ut_host} arguments. +@end deftypefun + +@strong{Portability Note:} The BSD @code{struct utmp} only has the +@code{ut_line}, @code{ut_name}, @code{ut_host} and @code{ut_time} +members. Older systems do not even have the @code{ut_host} member. + + +@node User Database +@section User Database +@cindex user database +@cindex password database +@pindex /etc/passwd + +This section describes how to search and scan the database of registered +users. The database itself is kept in the file @file{/etc/passwd} on +most systems, but on some systems a special network server gives access +to it. + +@menu +* User Data Structure:: What each user record contains. +* Lookup User:: How to look for a particular user. +* Scanning All Users:: Scanning the list of all users, one by one. +* Writing a User Entry:: How a program can rewrite a user's record. +@end menu + +@node User Data Structure +@subsection The Data Structure that Describes a User + +The functions and data structures for accessing the system user database +are declared in the header file @file{pwd.h}. +@pindex pwd.h + +@comment pwd.h +@comment POSIX.1 +@deftp {Data Type} {struct passwd} +The @code{passwd} data structure is used to hold information about +entries in the system user data base. It has at least the following members: + +@table @code +@item char *pw_name +The user's login name. + +@item char *pw_passwd. +The encrypted password string. + +@item uid_t pw_uid +The user ID number. + +@item gid_t pw_gid +The user's default group ID number. + +@item char *pw_gecos +A string typically containing the user's real name, and possibly other +information such as a phone number. + +@item char *pw_dir +The user's home directory, or initial working directory. This might be +a null pointer, in which case the interpretation is system-dependent. + +@item char *pw_shell +The user's default shell, or the initial program run when the user logs in. +This might be a null pointer, indicating that the system default should +be used. +@end table +@end deftp + +@node Lookup User +@subsection Looking Up One User +@cindex converting user ID to user name +@cindex converting user name to user ID + +You can search the system user database for information about a +specific user using @code{getpwuid} or @code{getpwnam}. These +functions are declared in @file{pwd.h}. + +@comment pwd.h +@comment POSIX.1 +@deftypefun {struct passwd *} getpwuid (uid_t @var{uid}) +@safety{@prelim{}@mtunsafe{@mtasurace{:pwuid} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getpwuid @mtasurace:pwuid @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c getpwuid_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +This function returns a pointer to a statically-allocated structure +containing information about the user whose user ID is @var{uid}. This +structure may be overwritten on subsequent calls to @code{getpwuid}. + +A null pointer value indicates there is no user in the data base with +user ID @var{uid}. +@end deftypefun + +@comment pwd.h +@comment POSIX.1c +@deftypefun int getpwuid_r (uid_t @var{uid}, struct passwd *@var{result_buf}, char *@var{buffer}, size_t @var{buflen}, struct passwd **@var{result}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getpwuid_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nscd_getpwuid_r @ascuheap @acsfd @acsmem +@c itoa_word dup ok +@c nscd_getpw_r @ascuheap @acsfd @acsmem +@c nscd_get_map_ref @ascuheap @acsfd @acsmem +@c nscd_acquire_maplock ok +@c nscd_get_mapping @ascuheap @acsfd @acsmem +@c open_socket dup @acsfd +@c memset dup ok +@c wait_on_socket dup ok +@c recvmsg dup ok +@c strcmp dup ok +@c fstat64 dup ok +@c mmap dup @acsmem +@c munmap dup @acsmem +@c malloc dup @ascuheap @acsmem +@c close dup ok +@c nscd_unmap dup @ascuheap @acsmem +@c nscd_cache_search ok +@c nis_hash ok +@c memcmp dup ok +@c nscd_open_socket @acsfd +@c open_socket @acsfd +@c socket dup @acsfd +@c fcntl dup ok +@c strcpy dup ok +@c connect dup ok +@c send dup ok +@c gettimeofday dup ok +@c poll dup ok +@c close_not_cancel_no_status dup @acsfd +@c wait_on_socket dup ok +@c read dup ok +@c close_not_cancel_no_status dup @acsfd +@c readall ok +@c read dup ok +@c wait_on_socket ok +@c poll dup ok +@c gettimeofday dup ok +@c memcpy dup ok +@c close_not_cancel_no_status dup @acsfd +@c nscd_drop_map_ref @ascuheap @acsmem +@c nscd_unmap dup @ascuheap @acsmem +@c nscd_unmap @ascuheap @acsmem +@c munmap dup ok +@c free dup @ascuheap @acsmem +@c nss_passwd_lookup2 @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_database_lookup @mtslocale @ascuheap @asulock @acucorrupt @acsmem @acsfd @aculock +@c libc_lock_lock @asulock @aculock +@c libc_lock_unlock @aculock +@c nss_parse_file @mtslocale @ascuheap @asulock @acucorrupt @acsmem @acsfd @aculock +@c fopen dup @ascuheap @asulock @acsmem @acsfd @aculock +@c fsetlocking dup ok [no concurrent uses] +@c malloc dup @asulock @aculock @acsfd @acsmem +@c fclose dup @ascuheap @asulock @acsmem @acsfd @aculock +@c getline dup @ascuheap @aculock @acucorrupt @acsmem +@c strchrnul dup ok +@c nss_getline @mtslocale @ascuheap @acsmem +@c isspace @mtslocale^^ +@c strlen dup ok +@c malloc dup @asulock @aculock @acsfd @acsmem +@c memcpy dup ok +@c nss_parse_service_list dup @mtslocale^, @ascuheap @acsmem +@c feof_unlocked dup ok +@c free dup @asulock @aculock @acsfd @acsmem +@c strcmp dup ok +@c nss_parse_service_list @mtslocale^, @ascuheap @acsmem +@c isspace @mtslocale^^ +@c malloc dup @asulock @aculock @acsfd @acsmem +@c mempcpy dup ok +@c strncasecmp dup ok +@c free dup @asulock @aculock @acsfd @acsmem +@c malloc dup @asulock @aculock @acsfd @acsmem +@c nss_lookup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup_function @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock @asulock @aculock +@c tsearch @ascuheap @acucorrupt @acsmem [no @mtsrace or @asucorrupt due to locking] +@c known_compare ok +@c strcmp dup ok +@c malloc dup @ascuheap @acsmem +@c tdelete @ascuheap @acucorrupt @acsmem [no @mtsrace or @asucorrupt due to locking] +@c free dup @ascuheap @acsmem +@c nss_load_library @ascudlopen @ascuplugin @ascuheap @asulock @aculock @acsfd @acsmem +@c nss_new_service @ascuheap @acsmem +@c strcmp dup ok +@c malloc dup @ascuheap @acsmem +@c strlen dup ok +@c stpcpy dup ok +@c libc_dlopen @ascudlopen @ascuheap @asulock @aculock @acsfd @acsmem +@c libc_dlsym dup @asulock @aculock @acsfd @acsmem +@c *ifct(*nscd_init_cb) @ascuplugin +@c stpcpy dup ok +@c libc_dlsym dup @asulock @aculock @acsfd @acsmem +@c libc_lock_unlock dup ok +@c nss_next_action ok +@c *fct.l -> _nss_*_getpwuid_r @ascuplugin +@c nss_next2 @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_next_action dup ok +@c nss_lookup_function dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem + +@c _nss_files_getpwuid_r @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_lock_lock dup @asulock @aculock +@c internal_setent @ascuheap @asulock @aculock @acsmem @acsfd +@c fopen dup @ascuheap @asulock @acsmem @acsfd @aculock +@c fileno dup ok +@c fcntl dup ok +@c fclose dup @ascuheap @asulock @aculock @acsmem @acsfd +@c rewind dup @aculock [stream guarded by non-recursive pwent lock] +@c internal_getent @mtslocale^ +@c fgets_unlocked dup ok [stream guarded by non-recursive pwent lock] +@c isspace dup @mtslocale^^ +@c _nss_files_parse_pwent = parse_line ok +@c strpbrk dup ok +@c internal_endent @ascuheap @asulock @aculock @acsmem @acsfd +@c fclose dup @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_lock_unlock dup @aculock + +@c _nss_nis_getpwuid_r ... not fully reviewed (assumed) @asuinit @asulock @acucorrupt @aculock +@c yp_get_default_domain @asulock @aculock +@c libc_lock_lock dup @asulock @aculock +@c getdomainname dup ok +@c strcmp dup ok +@c libc_lock_unlock dup @aculock +@c snprintf dup @ascuheap @acsmem +@c yp_match +@c do_ypcall_tr(xdr_ypreq_key,xdr_ypresp_val) +@c do_ypcall(xdr_ypreq_key,xdr_ypresp_val) +@c libc_lock_lock @asulock @aculock +@c strcmp +@c yp_bind +@c ypclnt_call +@c clnt_call +@c clnt_perror +@c libc_lock_unlock @aculock +@c yp_unbind_locked +@c yp_unbind +@c strcmp dup ok +@c calloc dup @asulock @aculock @acsfd @acsmem +@c yp_bind_file +@c strlen dup ok +@c snprintf dup @ascuheap @acsmem +@c open dup @acsfd [cancelpt] +@c pread dup [cancelpt] +@c yp_bind_client_create +@c close dup @acsfd [cancelpt] +@c yp_bind_ypbindprog +@c clnttcp_create +@c clnt_destroy +@c clnt_call(xdr_domainname,xdr_ypbind_resp) +@c memset dup ok +@c yp_bind_client_create +@c free dup @asulock @aculock @acsfd @acsmem +@c calloc dup @asulock @aculock @acsfd @acsmem +@c free dup @asulock @aculock @acsfd @acsmem +@c ypprot_err +@c memcpy dup ok +@c xdr_free(xdr_ypresp_val) +@c xdr_ypresp_val +@c xdr_ypstat +@c xdr_enum +@c XDR_PUTLONG +@c *x_putlong +@c XDR_GETLONG +@c *x_getlong +@c xdr_long +@c XDR_PUTLONG dup +@c XDR_GETLONG dup +@c xdr_short +@c XDR_PUTLONG dup +@c XDR_GETLONG dup +@c xdr_valdat +@c xdr_bytes +@c xdr_u_int +@c XDR_PUTLONG dup +@c XDR_GETLONG dup +@c mem_alloc @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c xdr_opaque +@c XDR_GETBYTES +@c *x_getbytes +@c XDR_PUTBYTES +@c *x_putbytes +@c mem_free @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c yperr2nss ok +@c strchr dup ok +@c _nls_default_nss @asuinit @ascuheap @asulock @acucorrupt @acsmem @acsfd @aculock +@c init @asuinit^, @ascuheap @asulock @acucorrupt @acsmem @acsfd @aculock +@c fopen dup @ascuheap @asulock @acsmem @acsfd @aculock +@c fsetlocking ok [no concurrent uses] +@c feof_unlocked dup ok +@c getline dup @ascuheap @aculock @acucorrupt @acsmem +@c isspace dup @mtslocale^^ +@c strncmp dup ok +@c free dup @asulock @acsmem @acsfd @aculock +@c fclose dup @ascuheap @asulock @aculock @acsmem @acsfd +@c free dup @asulock @acsmem @acsfd @aculock +@c mempcpy dup ok +@c strncpy dup ok +@c isspace dup @mtslocale^^ +@c _nss_files_parse_pwent ok +This function is similar to @code{getpwuid} in that it returns +information about the user whose user ID is @var{uid}. However, it +fills the user supplied structure pointed to by @var{result_buf} with +the information instead of using a static buffer. The first +@var{buflen} bytes of the additional buffer pointed to by @var{buffer} +are used to contain additional information, normally strings which are +pointed to by the elements of the result structure. + +If a user with ID @var{uid} is found, the pointer returned in +@var{result} points to the record which contains the wanted data (i.e., +@var{result} contains the value @var{result_buf}). If no user is found +or if an error occurred, the pointer returned in @var{result} is a null +pointer. The function returns zero or an error code. If the buffer +@var{buffer} is too small to contain all the needed information, the +error code @code{ERANGE} is returned and @var{errno} is set to +@code{ERANGE}. +@end deftypefun + + +@comment pwd.h +@comment POSIX.1 +@deftypefun {struct passwd *} getpwnam (const char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasurace{:pwnam} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getpwnam @mtasurace:pwnam @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c getpwnam_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +This function returns a pointer to a statically-allocated structure +containing information about the user whose user name is @var{name}. +This structure may be overwritten on subsequent calls to +@code{getpwnam}. + +A null pointer return indicates there is no user named @var{name}. +@end deftypefun + +@comment pwd.h +@comment POSIX.1c +@deftypefun int getpwnam_r (const char *@var{name}, struct passwd *@var{result_buf}, char *@var{buffer}, size_t @var{buflen}, struct passwd **@var{result}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getpwnam_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nscd_getpwnam_r @ascuheap @asulock @aculock @acsfd @acsmem +@c strlen dup ok +@c nscd_getpw_r dup @ascuheap @asulock @aculock @acsfd @acsmem +@c nss_passwd_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c +@c _nss_files_getpwnam_r @mtslocale @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_lock_lock dup @asulock @aculock +@c internal_setent dup @ascuheap @asulock @aculock @acsmem @acsfd +@c internal_getent dup @mtslocale^ +@c strcmp dup ok +@c internal_endent dup @ascuheap @asulock @aculock @acsmem @acsfd +@c libc_lock_unlock dup @aculock +@c +@c _nss_*_getpwnam_r (assumed) @asuinit @asulock @acucorrupt @aculock + +This function is similar to @code{getpwnam} in that it returns +information about the user whose user name is @var{name}. However, like +@code{getpwuid_r}, it fills the user supplied buffers in +@var{result_buf} and @var{buffer} with the information instead of using +a static buffer. + +The return values are the same as for @code{getpwuid_r}. +@end deftypefun + + +@node Scanning All Users +@subsection Scanning the List of All Users +@cindex scanning the user list + +This section explains how a program can read the list of all users in +the system, one user at a time. The functions described here are +declared in @file{pwd.h}. + +You can use the @code{fgetpwent} function to read user entries from a +particular file. + +@comment pwd.h +@comment SVID +@deftypefun {struct passwd *} fgetpwent (FILE *@var{stream}) +@safety{@prelim{}@mtunsafe{@mtasurace{:fpwent}}@asunsafe{@asucorrupt{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{}}} +@c fgetpwent @mtasurace:fpwent @asucorrupt @asulock @acucorrupt @aculock +@c fgetpos dup @asucorrupt @aculock @acucorrupt +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c fgetpwent_r dup @asucorrupt @acucorrupt @aculock +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c fsetpos dup @asucorrupt @aculock @acucorrupt +@c libc_lock_unlock dup @aculock +This function reads the next user entry from @var{stream} and returns a +pointer to the entry. The structure is statically allocated and is +rewritten on subsequent calls to @code{fgetpwent}. You must copy the +contents of the structure if you wish to save the information. + +The stream must correspond to a file in the same format as the standard +password database file. +@end deftypefun + +@comment pwd.h +@comment GNU +@deftypefun int fgetpwent_r (FILE *@var{stream}, struct passwd *@var{result_buf}, char *@var{buffer}, size_t @var{buflen}, struct passwd **@var{result}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +@c fgetpwent_r @asucorrupt @acucorrupt @aculock +@c flockfile dup @aculock +@c fgets_unlocked @asucorrupt @acucorrupt [no @mtsrace due to explicit locking] +@c feof_unlocked dup ok +@c funlockfile dup @aculock +@c isspace dup @mtslocale^^ +@c parse_line dup ok +This function is similar to @code{fgetpwent} in that it reads the next +user entry from @var{stream}. But the result is returned in the +structure pointed to by @var{result_buf}. The +first @var{buflen} bytes of the additional buffer pointed to by +@var{buffer} are used to contain additional information, normally +strings which are pointed to by the elements of the result structure. + +The stream must correspond to a file in the same format as the standard +password database file. + +If the function returns zero @var{result} points to the structure with +the wanted data (normally this is in @var{result_buf}). If errors +occurred the return value is nonzero and @var{result} contains a null +pointer. +@end deftypefun + +The way to scan all the entries in the user database is with +@code{setpwent}, @code{getpwent}, and @code{endpwent}. + +@comment pwd.h +@comment SVID, BSD +@deftypefun void setpwent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:pwent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c setpwent @mtasurace:pwent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock @asulock @aculock +@c nss_setent(nss_passwd_lookup2) @mtasurace:pwent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c ** resolv's res_maybe_init not called here +@c setup(nss_passwd_lookup2) @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *lookup_fct = nss_passwd_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:pwent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock @aculock +This function initializes a stream which @code{getpwent} and +@code{getpwent_r} use to read the user database. +@end deftypefun + +@comment pwd.h +@comment POSIX.1 +@deftypefun {struct passwd *} getpwent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:pwent} @mtasurace{:pwentbuf} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getpwent @mtasurace:pwent @mtasurace:pwentbuf @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent(getpwent_r) @mtasurace:pwent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c malloc dup @ascuheap @acsmem +@c *func = getpwent_r dup @mtasurace:pwent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +The @code{getpwent} function reads the next entry from the stream +initialized by @code{setpwent}. It returns a pointer to the entry. The +structure is statically allocated and is rewritten on subsequent calls +to @code{getpwent}. You must copy the contents of the structure if you +wish to save the information. + +A null pointer is returned when no more entries are available. +@end deftypefun + +@comment pwd.h +@comment GNU +@deftypefun int getpwent_r (struct passwd *@var{result_buf}, char *@var{buffer}, size_t @var{buflen}, struct passwd **@var{result}) +@safety{@prelim{}@mtunsafe{@mtasurace{:pwent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c The static buffer here is not the result_buf, but rather the +@c variables that keep track of what nss backend we've last used, and +@c whatever internal state the nss backend uses to keep track of the +@c last read entry. +@c getpwent_r @mtasurace:pwent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nss_getent_r(nss_passwd_lookup2) @mtasurace:pwent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c setup(nss_passwd_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:pwent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *sfct.f @mtasurace:pwent @ascuplugin +@c libc_lock_unlock dup @aculock +This function is similar to @code{getpwent} in that it returns the next +entry from the stream initialized by @code{setpwent}. Like +@code{fgetpwent_r}, it uses the user-supplied buffers in +@var{result_buf} and @var{buffer} to return the information requested. + +The return values are the same as for @code{fgetpwent_r}. + +@end deftypefun + +@comment pwd.h +@comment SVID, BSD +@deftypefun void endpwent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:pwent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c endpwent @mtasurace:pwent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock @asulock @aculock +@c nss_endent(nss_passwd_lookup2) @mtasurace:pwent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c ** resolv's res_maybe_init not called here +@c setup(nss_passwd_lookup2) dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @mtasurace:pwent @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_unlock @aculock +This function closes the internal stream used by @code{getpwent} or +@code{getpwent_r}. +@end deftypefun + +@node Writing a User Entry +@subsection Writing a User Entry + +@comment pwd.h +@comment SVID +@deftypefun int putpwent (const struct passwd *@var{p}, FILE *@var{stream}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}} +@c putpwent @mtslocale @asucorrupt @aculock @acucorrupt +@c fprintf dup @mtslocale @asucorrupt @aculock @acucorrupt [no @ascuheap @acsmem] +This function writes the user entry @code{*@var{p}} to the stream +@var{stream}, in the format used for the standard user database +file. The return value is zero on success and nonzero on failure. + +This function exists for compatibility with SVID. We recommend that you +avoid using it, because it makes sense only on the assumption that the +@code{struct passwd} structure has no members except the standard ones; +on a system which merges the traditional Unix data base with other +extended information about users, adding an entry using this function +would inevitably leave out much of the important information. +@c Then how are programmers to modify the password file? -zw + +The group and user ID fields are left empty if the group or user name +starts with a - or +. + +The function @code{putpwent} is declared in @file{pwd.h}. +@end deftypefun + +@node Group Database +@section Group Database +@cindex group database +@pindex /etc/group + +This section describes how to search and scan the database of +registered groups. The database itself is kept in the file +@file{/etc/group} on most systems, but on some systems a special network +service provides access to it. + +@menu +* Group Data Structure:: What each group record contains. +* Lookup Group:: How to look for a particular group. +* Scanning All Groups:: Scanning the list of all groups. +@end menu + +@node Group Data Structure +@subsection The Data Structure for a Group + +The functions and data structures for accessing the system group +database are declared in the header file @file{grp.h}. +@pindex grp.h + +@comment grp.h +@comment POSIX.1 +@deftp {Data Type} {struct group} +The @code{group} structure is used to hold information about an entry in +the system group database. It has at least the following members: + +@table @code +@item char *gr_name +The name of the group. + +@item gid_t gr_gid +The group ID of the group. + +@item char **gr_mem +A vector of pointers to the names of users in the group. Each user name +is a null-terminated string, and the vector itself is terminated by a +null pointer. +@end table +@end deftp + +@node Lookup Group +@subsection Looking Up One Group +@cindex converting group name to group ID +@cindex converting group ID to group name + +You can search the group database for information about a specific +group using @code{getgrgid} or @code{getgrnam}. These functions are +declared in @file{grp.h}. + +@comment grp.h +@comment POSIX.1 +@deftypefun {struct group *} getgrgid (gid_t @var{gid}) +@safety{@prelim{}@mtunsafe{@mtasurace{:grgid} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getgrgid =~ getpwuid dup @mtasurace:grgid @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c getgrgid_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +This function returns a pointer to a statically-allocated structure +containing information about the group whose group ID is @var{gid}. +This structure may be overwritten by subsequent calls to +@code{getgrgid}. + +A null pointer indicates there is no group with ID @var{gid}. +@end deftypefun + +@comment grp.h +@comment POSIX.1c +@deftypefun int getgrgid_r (gid_t @var{gid}, struct group *@var{result_buf}, char *@var{buffer}, size_t @var{buflen}, struct group **@var{result}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getgrgid_r =~ getpwuid_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nscd_getgrgid_r @ascuheap @acsfd @acsmem +@c itoa_word dup ok +@c nscd_getgr_r @ascuheap @acsfd @acsmem +@c nscd_get_map_ref dup @ascuheap @acsfd @acsmem +@c nscd_cache_search dup ok +@c nscd_open_socket dup @acsfd +@c readvall ok +@c readv dup ok +@c memcpy dup ok +@c wait_on_socket dup ok +@c memcpy dup ok +@c readall dup ok +@c close_not_cancel_no_status dup @acsfd +@c nscd_drop_map_ref dup @ascuheap @acsmem +@c nscd_unmap dup @ascuheap @acsmem +@c nss_group_lookup2 =~ nss_passwd_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l -> _nss_*_getgrgid_r @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +This function is similar to @code{getgrgid} in that it returns +information about the group whose group ID is @var{gid}. However, it +fills the user supplied structure pointed to by @var{result_buf} with +the information instead of using a static buffer. The first +@var{buflen} bytes of the additional buffer pointed to by @var{buffer} +are used to contain additional information, normally strings which are +pointed to by the elements of the result structure. + +If a group with ID @var{gid} is found, the pointer returned in +@var{result} points to the record which contains the wanted data (i.e., +@var{result} contains the value @var{result_buf}). If no group is found +or if an error occurred, the pointer returned in @var{result} is a null +pointer. The function returns zero or an error code. If the buffer +@var{buffer} is too small to contain all the needed information, the +error code @code{ERANGE} is returned and @var{errno} is set to +@code{ERANGE}. +@end deftypefun + +@comment grp.h +@comment SVID, BSD +@deftypefun {struct group *} getgrnam (const char *@var{name}) +@safety{@prelim{}@mtunsafe{@mtasurace{:grnam} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getgrnam =~ getpwnam dup @mtasurace:grnam @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c getgrnam_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +This function returns a pointer to a statically-allocated structure +containing information about the group whose group name is @var{name}. +This structure may be overwritten by subsequent calls to +@code{getgrnam}. + +A null pointer indicates there is no group named @var{name}. +@end deftypefun + +@comment grp.h +@comment POSIX.1c +@deftypefun int getgrnam_r (const char *@var{name}, struct group *@var{result_buf}, char *@var{buffer}, size_t @var{buflen}, struct group **@var{result}) +@safety{@prelim{}@mtsafe{@mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getgrnam_r =~ getpwnam_r dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nscd_getgrnam_r @ascuheap @asulock @aculock @acsfd @acsmem +@c strlen dup ok +@c nscd_getgr_r dup @ascuheap @asulock @aculock @acsfd @acsmem +@c nss_group_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.l @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +This function is similar to @code{getgrnam} in that it returns +information about the group whose group name is @var{name}. Like +@code{getgrgid_r}, it uses the user supplied buffers in +@var{result_buf} and @var{buffer}, not a static buffer. + +The return values are the same as for @code{getgrgid_r}. +@end deftypefun + +@node Scanning All Groups +@subsection Scanning the List of All Groups +@cindex scanning the group list + +This section explains how a program can read the list of all groups in +the system, one group at a time. The functions described here are +declared in @file{grp.h}. + +You can use the @code{fgetgrent} function to read group entries from a +particular file. + +@comment grp.h +@comment SVID +@deftypefun {struct group *} fgetgrent (FILE *@var{stream}) +@safety{@prelim{}@mtunsafe{@mtasurace{:fgrent}}@asunsafe{@asucorrupt{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{}}} +@c fgetgrent @mtasurace:fgrent @asucorrupt @asulock @acucorrupt @aculock +@c fgetpos dup @asucorrupt @aculock @acucorrupt +@c libc_lock_lock dup @asulock @aculock +@c malloc dup @ascuheap @acsmem +@c fgetgrent_r dup @asucorrupt @acucorrupt @aculock +@c realloc dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c fsetpos dup @asucorrupt @aculock @acucorrupt +@c libc_lock_unlock dup @aculock +The @code{fgetgrent} function reads the next entry from @var{stream}. +It returns a pointer to the entry. The structure is statically +allocated and is overwritten on subsequent calls to @code{fgetgrent}. You +must copy the contents of the structure if you wish to save the +information. + +The stream must correspond to a file in the same format as the standard +group database file. +@end deftypefun + +@comment grp.h +@comment GNU +@deftypefun int fgetgrent_r (FILE *@var{stream}, struct group *@var{result_buf}, char *@var{buffer}, size_t @var{buflen}, struct group **@var{result}) +@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@acucorrupt{} @aculock{}}} +@c fgetgrent_r @asucorrupt @acucorrupt @aculock +@c flockfile dup @aculock +@c fgets_unlocked @asucorrupt @acucorrupt [no @mtsrace due to explicit locking] +@c feof_unlocked dup ok +@c funlockfile dup @aculock +@c isspace dup @mtslocale^^ +@c parse_line dup ok +This function is similar to @code{fgetgrent} in that it reads the next +user entry from @var{stream}. But the result is returned in the +structure pointed to by @var{result_buf}. The first @var{buflen} bytes +of the additional buffer pointed to by @var{buffer} are used to contain +additional information, normally strings which are pointed to by the +elements of the result structure. + +This stream must correspond to a file in the same format as the standard +group database file. + +If the function returns zero @var{result} points to the structure with +the wanted data (normally this is in @var{result_buf}). If errors +occurred the return value is non-zero and @var{result} contains a null +pointer. +@end deftypefun + +The way to scan all the entries in the group database is with +@code{setgrent}, @code{getgrent}, and @code{endgrent}. + +@comment grp.h +@comment SVID, BSD +@deftypefun void setgrent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:grent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c setgrent =~ setpwent dup @mtasurace:grent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c ...*lookup_fct = nss_group_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +This function initializes a stream for reading from the group data base. +You use this stream by calling @code{getgrent} or @code{getgrent_r}. +@end deftypefun + +@comment grp.h +@comment SVID, BSD +@deftypefun {struct group *} getgrent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:grent} @mtasurace{:grentbuf} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getgrent =~ getpwent dup @mtasurace:grent @mtasurace:grentbuf @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *func = getgrent_r dup @mtasurace:grent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +The @code{getgrent} function reads the next entry from the stream +initialized by @code{setgrent}. It returns a pointer to the entry. The +structure is statically allocated and is overwritten on subsequent calls +to @code{getgrent}. You must copy the contents of the structure if you +wish to save the information. +@end deftypefun + +@comment grp.h +@comment GNU +@deftypefun int getgrent_r (struct group *@var{result_buf}, char *@var{buffer}, size_t @var{buflen}, struct group **@var{result}) +@safety{@prelim{}@mtunsafe{@mtasurace{:grent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getgrent_r =~ getpwent_r dup @mtasurace:grent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +This function is similar to @code{getgrent} in that it returns the next +entry from the stream initialized by @code{setgrent}. Like +@code{fgetgrent_r}, it places the result in user-supplied buffers +pointed to by @var{result_buf} and @var{buffer}. + +If the function returns zero @var{result} contains a pointer to the data +(normally equal to @var{result_buf}). If errors occurred the return +value is non-zero and @var{result} contains a null pointer. +@end deftypefun + +@comment grp.h +@comment SVID, BSD +@deftypefun void endgrent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:grent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c endgrent =~ endpwent dup @mtasurace:grent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +This function closes the internal stream used by @code{getgrent} or +@code{getgrent_r}. +@end deftypefun + +@node Database Example +@section User and Group Database Example + +Here is an example program showing the use of the system database inquiry +functions. The program prints some information about the user running +the program. + +@smallexample +@include db.c.texi +@end smallexample + +Here is some output from this program: + +@smallexample +I am Throckmorton Snurd. +My login name is snurd. +My uid is 31093. +My home directory is /home/fsg/snurd. +My default shell is /bin/sh. +My default group is guest (12). +The members of this group are: + friedman + tami +@end smallexample + +@node Netgroup Database +@section Netgroup Database + +@menu +* Netgroup Data:: Data in the Netgroup database and where + it comes from. +* Lookup Netgroup:: How to look for a particular netgroup. +* Netgroup Membership:: How to test for netgroup membership. +@end menu + +@node Netgroup Data +@subsection Netgroup Data + +@cindex Netgroup +Sometimes it is useful to group users according to other criteria +(@pxref{Group Database}). E.g., it is useful to associate a certain +group of users with a certain machine. On the other hand grouping of +host names is not supported so far. + +In Sun Microsystems' SunOS appeared a new kind of database, the netgroup +database. It allows grouping hosts, users, and domains freely, giving +them individual names. To be more concrete, a netgroup is a list of triples +consisting of a host name, a user name, and a domain name where any of +the entries can be a wildcard entry matching all inputs. A last +possibility is that names of other netgroups can also be given in the +list specifying a netgroup. So one can construct arbitrary hierarchies +without loops. + +Sun's implementation allows netgroups only for the @code{nis} or +@code{nisplus} service, @pxref{Services in the NSS configuration}. The +implementation in @theglibc{} has no such restriction. An entry +in either of the input services must have the following form: + +@smallexample +@var{groupname} ( @var{groupname} | @code{(}@var{hostname}@code{,}@var{username}@code{,}@code{domainname}@code{)} )+ +@end smallexample + +Any of the fields in the triple can be empty which means anything +matches. While describing the functions we will see that the opposite +case is useful as well. I.e., there may be entries which will not +match any input. For entries like this, a name consisting of the single +character @code{-} shall be used. + +@node Lookup Netgroup +@subsection Looking up one Netgroup + +The lookup functions for netgroups are a bit different than all other +system database handling functions. Since a single netgroup can contain +many entries a two-step process is needed. First a single netgroup is +selected and then one can iterate over all entries in this netgroup. +These functions are declared in @file{netdb.h}. + +@comment netdb.h +@comment BSD +@deftypefun int setnetgrent (const char *@var{netgroup}) +@safety{@prelim{}@mtunsafe{@mtasurace{:netgrent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c setnetgrent @mtasurace:netgrent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c nscd_setnetgrent @ascuheap @acsfd @acsmem +@c __nscd_setnetgrent @ascuheap @acsfd @acsmem +@c strlen dup ok +@c nscd_get_map_ref dup @ascuheap @acsfd @acsmem +@c nscd_cache_search dup ok +@c nscd_open_socket dup @acsfd +@c malloc dup @ascuheap @acsmem +@c readall dup ok +@c free dup @ascuheap @acsmem +@c close_not_cancel_no_status dup @acsfd +@c nscd_drop_map_ref dup @ascuheap @acsmem +@c nscd_unmap dup @ascuheap @acsmem +@c internal_setnetgrent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c free_memory dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c internal_setnetgrent_reuse @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c endnetgrent_hook dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup_function dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *endfct @ascuplugin +@c (netgroup::)setup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_netgroup_lookup dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_netgroup_lookup2 =~ nss_passwd_lookup2 dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct.f @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup_function dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *endfct @ascuplugin +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c memcpy dup ok +@c libc_lock_unlock dup @aculock +A call to this function initializes the internal state of the library to +allow following calls of @code{getnetgrent} to iterate over all entries +in the netgroup with name @var{netgroup}. + +When the call is successful (i.e., when a netgroup with this name exists) +the return value is @code{1}. When the return value is @code{0} no +netgroup of this name is known or some other error occurred. +@end deftypefun + +It is important to remember that there is only one single state for +iterating the netgroups. Even if the programmer uses the +@code{getnetgrent_r} function the result is not really reentrant since +always only one single netgroup at a time can be processed. If the +program needs to process more than one netgroup simultaneously she +must protect this by using external locking. This problem was +introduced in the original netgroups implementation in SunOS and since +we must stay compatible it is not possible to change this. + +Some other functions also use the netgroups state. Currently these are +the @code{innetgr} function and parts of the implementation of the +@code{compat} service part of the NSS implementation. + +@comment netdb.h +@comment BSD +@deftypefun int getnetgrent (char **@var{hostp}, char **@var{userp}, char **@var{domainp}) +@safety{@prelim{}@mtunsafe{@mtasurace{:netgrent} @mtasurace{:netgrentbuf} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getnetgrent @mtasurace:netgrent @mtasurace:netgrentbuf @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c uses unsafely a static buffer allocated within a libc_once call +@c allocate (libc_once) @ascuheap @acsmem +@c malloc dup @ascuheap @acsmem +@c getnetgrent_r dup @mtasurace:netgrent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +This function returns the next unprocessed entry of the currently +selected netgroup. The string pointers, in which addresses are passed in +the arguments @var{hostp}, @var{userp}, and @var{domainp}, will contain +after a successful call pointers to appropriate strings. If the string +in the next entry is empty the pointer has the value @code{NULL}. +The returned string pointers are only valid if none of the netgroup +related functions are called. + +The return value is @code{1} if the next entry was successfully read. A +value of @code{0} means no further entries exist or internal errors occurred. +@end deftypefun + +@comment netdb.h +@comment GNU +@deftypefun int getnetgrent_r (char **@var{hostp}, char **@var{userp}, char **@var{domainp}, char *@var{buffer}, size_t @var{buflen}) +@safety{@prelim{}@mtunsafe{@mtasurace{:netgrent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c getnetgrent_r @mtasurace:netgrent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c internal_getnetgrent_r @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nss_lookup_function dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *fct @ascuplugin +@c nscd_getnetgrent ok +@c rawmemchr dup ok +@c internal_setnetgrent_reuse dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c strcmp dup ok +@c malloc dup @ascuheap @acsmem +@c memcpy dup ok +@c libc_lock_unlock dup @aculock +This function is similar to @code{getnetgrent} with only one exception: +the strings the three string pointers @var{hostp}, @var{userp}, and +@var{domainp} point to, are placed in the buffer of @var{buflen} bytes +starting at @var{buffer}. This means the returned values are valid +even after other netgroup related functions are called. + +The return value is @code{1} if the next entry was successfully read and +the buffer contains enough room to place the strings in it. @code{0} is +returned in case no more entries are found, the buffer is too small, or +internal errors occurred. + +This function is a GNU extension. The original implementation in the +SunOS libc does not provide this function. +@end deftypefun + +@comment netdb.h +@comment BSD +@deftypefun void endnetgrent (void) +@safety{@prelim{}@mtunsafe{@mtasurace{:netgrent}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c endnetgrent @mtasurace:netgrent @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c libc_lock_lock dup @asulock @aculock +@c internal_endnetgrent @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c endnetgrent_hook dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c free_memory dup @ascuheap @acsmem +@c libc_lock_unlock dup @aculock +This function frees all buffers which were allocated to process the last +selected netgroup. As a result all string pointers returned by calls +to @code{getnetgrent} are invalid afterwards. +@end deftypefun + +@node Netgroup Membership +@subsection Testing for Netgroup Membership + +It is often not necessary to scan the whole netgroup since often the +only interesting question is whether a given entry is part of the +selected netgroup. + +@comment netdb.h +@comment BSD +@deftypefun int innetgr (const char *@var{netgroup}, const char *@var{host}, const char *@var{user}, const char *@var{domain}) +@safety{@prelim{}@mtunsafe{@mtasurace{:netgrent} @mtslocale{}}@asunsafe{@ascudlopen{} @ascuplugin{} @ascuheap{} @asulock{}}@acunsafe{@acucorrupt{} @aculock{} @acsfd{} @acsmem{}}} +@c This function does not use the static data structure that the +@c *netgrent* ones do, but since each nss must maintains internal state +@c to support iteration and concurrent iteration will interfere +@c destructively, we regard this internal state as a static buffer. +@c getnetgrent_r iteration in each nss backend. +@c innetgr @mtasurace:netgrent @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c nscd_innetgr @ascuheap @acsfd @acsmem +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c stpcpy dup ok +@c nscd_get_map_ref dup @ascuheap @acsfd @acsmem +@c nscd_cache_search dup ok +@c nscd_open_socket dup @acsfd +@c close_not_cancel_no_status dup @acsfd +@c nscd_drop_map_ref dup @ascuheap @acsmem +@c nscd_unmap dup @ascuheap @acsmem +@c free dup @ascuheap @acsmem +@c memset dup ok +@c (netgroup::)setup dup @mtslocale @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *setfct.f @ascuplugin +@c nss_lookup_function dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c *getfct @ascuplugin +@c strcmp dup ok +@c strlen dup ok +@c malloc dup @ascuheap @acsmem +@c memcpy dup ok +@c strcasecmp dup +@c *endfct @ascuplugin +@c nss_next2 dup @ascudlopen @ascuplugin @ascuheap @asulock @acucorrupt @aculock @acsfd @acsmem +@c free_memory dup @ascuheap @acsmem +This function tests whether the triple specified by the parameters +@var{host}, @var{user}, and @var{domain} is part of the netgroup +@var{netgroup}. Using this function has the advantage that + +@enumerate +@item +no other netgroup function can use the global netgroup state since +internal locking is used and +@item +the function is implemented more efficiently than successive calls +to the other @code{set}/@code{get}/@code{endnetgrent} functions. +@end enumerate + +Any of the pointers @var{host}, @var{user}, or @var{domain} can be +@code{NULL} which means any value is accepted in this position. This is +also true for the name @code{-} which should not match any other string +otherwise. + +The return value is @code{1} if an entry matching the given triple is +found in the netgroup. The return value is @code{0} if the netgroup +itself is not found, the netgroup does not contain the triple or +internal errors occurred. +@end deftypefun + +@c FIXME these are undocumented: +@c setresgid +@c setresuid diff --git a/REORG.TODO/manual/xtract-typefun.awk b/REORG.TODO/manual/xtract-typefun.awk new file mode 100644 index 0000000000..57e567f9ba --- /dev/null +++ b/REORG.TODO/manual/xtract-typefun.awk @@ -0,0 +1,43 @@ +#!/usr/local/bin/gawk -f +BEGIN { + last_node=""; +} + +/^@node/ { + name = $0; + sub(/^@node +/, "", name); + sub(/[@,].*$/, "", name); + last_node = name; +} + +/^@deftype(fn|vr)/ { +# The string we want is $4, except that if there were brace blocks +# before that point then it gets shifted to the right, since awk +# doesn't know from brace blocks. + id = 4; check = 2; squig = 0; + while(check < id) + { + if($check ~ /{/) squig++; + if($check ~ /}/) squig--; + if(squig) id++; + check++; + } + + gsub(/[(){}*]/, "", $id); + printf ("* %s: (libc)%s.\n", $id, last_node); +} + +/^@deftypefun/ { +# Likewise, except it's $3 theoretically. + id = 3; check = 2; squig = 0; + while(check < id) + { + if($check ~ /{/) squig++; + if($check ~ /}/) squig--; + if(squig) id++; + check++; + } + + gsub(/[(){}*]/, "", $id); + printf ("* %s: (libc)%s.\n", $id, last_node); +} |