(patch for Bash) regex(3) splitting/matching

From: William Park (opengeometry_at_yahoo.ca)
Date: 01/03/04


Date: 3 Jan 2004 03:51:52 GMT


(Hmm... my first post seems to have been cut short...)

Often, I needed to do something like

    - Return all '[a-z]+' patterns in a string, ie.
        'abc123xyz456' --> ['abc', 'xyz']
    
    - Split a string on '[a-z]+', ie.
        'abc123xyz456' --> ['123', '456']

I usually do this in Python. But, what I need is those string segments
in Bash script, and it's pain to toggle back and forth between Python
and Bash. So, included below is my patch (for Bash-2.05b) to do
regex(3) filtering on strings. Usage goes something like
    
    $ a=()
    $ array -E '[a-z]+' a 'abc123xyz456'
    $ echo ${a[*]}
    abc xyz

    $ a=()
    $ array -V '[a-z]+' a 'abc123xyz456'
    $ echo ${a[*]}
    123 456

'-E' option (like -e in egrep) searches for '[a-z]+', and appends all
matching substrings to array variable 'a'. Conversely, '-V' (like -v in
egrep) searches for '[a-z]+', and appends all non-matching substrings to
'a'.

'help array' will give you more info on other options for 'array'
command. Of course, my patch does more. But, they were previously
posted to Usenet, and current post only have minor fixes.

-- 
William Park, Open Geometry Consulting, <opengeometry@yahoo.ca>
Linux solution for data management and processing. 
diff -r -u bash-2.05b/braces.c bash/braces.c
--- bash-2.05b/braces.c	2002-05-06 13:50:40.000000000 -0400
+++ bash/braces.c	2004-01-02 20:36:20.000000000 -0500
@@ -64,6 +64,10 @@
 static char **array_concat ();
 #endif
 
+
+#include "chartypes.h"		/* needed for ISLOWER() and ISUPPER() */
+
+
 /* Return an array of strings; the brace expansion of TEXT. */
 char **
 brace_expand (text)
@@ -161,22 +165,199 @@
       ADVANCE_CHAR (amble, alen, j);
     }
 
-  if (!amble[j])
-    {
-      free (amble);
-      free (preamble);
-      result[0] = savestring (text);
-      return (result);
-    }
+  if (!amble[j]) {
+      /* 
+       * Okey, found a standalone brace expression without ','.  If the amble
+       * contains 'a..b' expression, where 'a' and 'b' are positive integers,
+       * then replace it with 'a,a+1,...,b' (if a < b) or 'a,a-1,...,b' (if a >
+       * b), and give it back to shell for a normal expansion.  If 'a' or 'b'
+       * has leading '0', then zero pad the numbers.  The format size is the
+       * maximum size of 'a' or 'b'.  This is brace version of 'seq a b'.
+       *
+       * If 'a' or 'b' is a regular shell variable (not positional parameter or
+       * array element), then replace it with its value $a or $b.  If 'a' or 'b'
+       * starts with '!', then indirect substitution will be tried, similiar to
+       * ${!a} or ${!b}.  In any case, if the final 'a..b' is pure number, then
+       * generate the usual integer sequence.  This is brace version of 'seq $a
+       * $b' or 'seq ${!a} ${!b}'.
+       *
+       * If 'a' or 'b' is '#', then replace it with value $# and generate
+       * integer sequence as usual.  If 'a' or 'b' is '*', then replace it with
+       * value $#, and generate parameter sequence by putting '${}' around the
+       * integers to indicate positional parameter.  However, expansion is done
+       * only if there are parameters (ie. $# >= 1).  If there is no parameter,
+       * then don't replace it.  This is brace version of 'seq a $#', 'seq $#
+       * b', and $*.
+       *
+       * If the expression is 'a--b', where 'a' and 'b' are strings of same
+       * size, then generate string sequence.  Characters must be both lowercase
+       * or both uppercase.  So, {a--c} is same as {a,b,c} and {A--C} is same as
+       * {A,B,C}, and {Aa--Bb} is same as {Aa,Ab,...,Az,Ba,Bb}.
+       * 
+       * Otherwise, return the original string back to shell as is, like before.
+       *
+       * --William Park <opengeometry@yahoo.ca>
+       */
+      char *a, *b, *t;
+      int dollarflag, zeropad, compareflag;
+      size_t i, end, n, size;
+      intmax_t x, y;
+
+      if (t = strstr (amble, "--")) {
+	  a = substring (amble, 0, t - amble);
+	  b = substring (amble, t - amble + 2, alen);
+	  if (strlen (a) == 0 || strlen (a) != strlen (b)) {
+	      free (a);
+	      free (b);
+	      goto Original_Code;
+	  }
+	  size = strlen (a);
+	  n = 1;
+	  for (i = 0; i < size; i++) {
+#if 0
+	      if (n == 1 && a[i] == b[i])	/* leading characters can be anything */
+		  continue;
+#endif
+	      if (! (ISLOWER (a[i]) && ISLOWER (b[i]) || ISUPPER (a[i]) && ISUPPER (b[i]))) {
+		  free (a);
+		  free (b);
+		  goto Original_Code;
+	      }
+	      if (a[i] != b[i] || n > 1) 
+		  if (n == 1)
+		      n = abs (b[i] - a[i]) + 1;	/* first position */
+		  else
+		      n *= 26;		/* max number: 26^{size} */
+	  }
+	  
+	  /* By this point, 'a' and 'b' are strings of equal size.
+	   */
+	  tack = strvec_create (n + 1);
+	  n = 0;
+	  do {
+	      tack[n++] = savestring (a);
+	      if ((compareflag = strcmp (a, b)) == 0) {
+		  tack[n] = (char *)NULL;
+		  break;
+	      }
+	      else if (compareflag < 0) {
+		  for (i = size - 1; i >= 1 && (a[i] == 'Z' || a[i] == 'z'); i--)
+		      a[i] -= 25;	/* back to 'A' or 'a' */
+		  ++a[i];
+	      }
+	      else if (compareflag > 0) {
+		  for (i = size - 1; i >= 1 && (a[i] == 'A' || a[i] == 'a'); i--)
+		      a[i] += 25;	/* back to 'Z' or 'z' */
+		  --a[i];
+	      }
+	  } while (1);
+      }
+      else if (t = strstr (amble, "..")) {
+	  a = substring (amble, 0, t - amble);
+	  b = substring (amble, t - amble + 2, alen);
+	  dollarflag = zeropad = 0;
+
+	  if (legal_identifier (a) && (t = get_string_value (a))) {
+	      free (a);
+	      a = savestring (t);
+	  }
+	  else if (*a == '!') {
+	      if (legal_identifier (a + 1) && (t = get_string_value (a + 1))) 
+		  if (legal_identifier (t) && (t = get_string_value (t))) {
+		      free (a);
+		      a = savestring (t);
+		  }
+	  }
+	  else if ((*a == '#' || *a == '*') && a[1] == '\0') {
+	      if (*a == '*')
+		  dollarflag = 1;
+	      if (n = number_of_args ()) {
+		  free (a);
+		  a = itos (n);
+	      }
+	  }
+
+	  if (legal_identifier (b) && (t = get_string_value (b))) {
+	      free (b);
+	      b = savestring (t);
+	  }
+	  else if (*b == '!') {
+	      if (legal_identifier (b + 1) && (t = get_string_value (b + 1))) 
+		  if (legal_identifier (t) && (t = get_string_value (t))) {
+		      free (b);
+		      b = savestring (t);
+		  }
+	  }
+	  else if ((*b == '#' || *b == '*') && b[1] == '\0') {
+	      if (*b == '*')
+		  dollarflag = 1;
+	      if (n = number_of_args ()) {
+		  free (b);
+		  b = itos (n);
+	      }
+	  }
+
+	  /* By this point, 'a' and 'b' must be all numbers.  If not, then exit
+	   * per original code.  Check for empty string explicitly, because
+	   * all_digits() returns 1 if string is empty (crazy!).
+	   */
+	  if (!(*a && all_digits (a) && legal_number (a, &x) && x >= 0
+		      && *b && all_digits (b) && legal_number (b, &y) && y >= 0)) {
+	      free (a);
+	      free (b);
+	      goto Original_Code;
+	  }
+
+	  i = x;
+	  end = y;
+	  n = abs (end - i) + 1;
+	  size = (strlen (a) > strlen (b)) ? strlen (a) : strlen (b);
+	  if (strlen (a) > 1 && *a == '0' || strlen (b) > 1 && *b == '0')
+	      zeropad = 1;
+
+	  tack = strvec_create (n + 1);
+	  n = 0;
+	  do {
+	      t = (char *)xmalloc (size + 3 + 1);	/* ${number} or number */
+	      if (dollarflag)
+		  sprintf (t, "${%d}", i);
+	      else if (zeropad)
+		  sprintf (t, "%0*d", size, i);
+	      else
+		  sprintf (t, "%d", i);
+	      tack[n++] = t;
+	      if (i == end) {
+		  tack[n] = (char *)NULL;
+		  break;
+	      }
+	      else if (i < end)
+		  ++i;
+	      else if (i > end)
+		  --i;
+	  } while (1);
+      }
+      else {
+Original_Code:
+	  free (amble);		/* original code */
+	  free (preamble);
+	  result[0] = savestring (text);
+	  return (result);
+      }
+
+      free (a);
+      free (b);
+      goto New_Tack;
+  }
 #endif /* SHELL */
 
-  postamble = &text[i + 1];
-
   tack = expand_amble (amble, alen);
+New_Tack:
   result = array_concat (result, tack);
   free (amble);
   strvec_dispose (tack);
 
+  postamble = &text[i + 1];
+
   tack = brace_expand (postamble);
   result = array_concat (result, tack);
   strvec_dispose (tack);
diff -r -u bash-2.05b/builtins/common.c bash/builtins/common.c
--- bash-2.05b/builtins/common.c	2002-06-28 12:24:31.000000000 -0400
+++ bash/builtins/common.c	2004-01-02 16:46:39.000000000 -0500
@@ -244,7 +244,7 @@
      char *s;
 {
   if (s)
-    builtin_error ("%s: no job control");
+    builtin_error ("%s: no job control", s);
   else
     builtin_error ("no job control");
 }
diff -r -u bash-2.05b/builtins/echo.def bash/builtins/echo.def
--- bash-2.05b/builtins/echo.def	2002-03-19 10:45:28.000000000 -0500
+++ bash/builtins/echo.def	2004-01-02 16:46:39.000000000 -0500
@@ -31,10 +31,12 @@
 #include <stdio.h>
 #include "../shell.h"
 
+#include "chartypes.h"		/* needed for ISXDIGIT() and HEXVALUE() */
+
 $BUILTIN echo
 $FUNCTION echo_builtin
 $DEPENDS_ON V9_ECHO
-$SHORT_DOC echo [-neE] [arg ...]
+$SHORT_DOC echo [-neEuU] [arg ...]
 Output the ARGs.  If -n is specified, the trailing newline is
 suppressed.  If the -e option is given, interpretation of the
 following backslash-escaped characters is turned on:
@@ -52,6 +54,11 @@
 
 You can explicitly turn off the interpretation of the above characters
 with the -E option.
+
+Option -u converts '%NN' 2-digit hexcode (used in URL) into 0xNN ASCII
+character.  To avoid confusion wth '\xN' or '\xNN' hexcodes, this option is
+ignored if -e option is on.  Option -U encodes ASCII characters to '%NN'
+2-digit hexcode, which is inverse of -u option.
 $END
 
 $BUILTIN echo
@@ -62,7 +69,7 @@
 $END
 
 #if defined (V9_ECHO)
-#  define VALID_ECHO_OPTIONS "neE"
+#  define VALID_ECHO_OPTIONS "neEuU"
 #else /* !V9_ECHO */
 #  define VALID_ECHO_OPTIONS "n"
 #endif /* !V9_ECHO */
@@ -88,6 +95,8 @@
   int display_return, do_v9, i, len;
   char *temp, *s;
 
+  int decode_URL = 0;		/* convert '%NN' to 0xNN ASCII character */
+
   do_v9 = xpg_echo;
   display_return = 1;
 
@@ -124,6 +133,12 @@
 	    case 'E':
 	      do_v9 = 0;
 	      break;
+	    case 'u':
+		decode_URL = 1;
+		break;
+	    case 'U':
+		decode_URL = 2;
+		break;
 #endif /* V9_ECHO */
 	    default:
 	      goto just_echo;	/* XXX */
@@ -145,6 +160,32 @@
 	      for (s = temp; len > 0; len--)
 		putchar (*s++);
 	    }
+	  /*
+	   * Conversion between '%NN' hexcode to 0xNN ASCII character, only if
+	   * -e option is not enabled to avoid confusion.  '%NN' must be 2-digit
+	   * hex, whereas '\xNN' can be 1 or 2 digit hex.  Doing this in C is
+	   * much easier than shell function, because you need access to
+	   * internal binary number.
+	   *
+	   * --William Park <opengeometry@yahoo.ca>
+	   */
+	  else if (decode_URL == 1) {
+	      for (s = temp; *s; s++)
+		  if (*s == '%' && ISXDIGIT (s[1]) && ISXDIGIT (s[2])) {
+		      putchar (HEXVALUE (s[1]) * 16 + HEXVALUE (s[2]));
+		      s += 2;
+		  } else 
+		      putchar (*s);
+	  }
+	  else if (decode_URL == 2) {
+	      char hexchar[] = "0123456789abcdef";
+
+	      for (s = temp; *s; s++) {
+		  putchar ('%');
+		  putchar (hexchar[(*s / 16) & 15]);	/* upper half */
+		  putchar (hexchar[*s & 15]);		/* lower half */
+	      }
+	  }
 	  else	    
 	    printf ("%s", temp);
 #if defined (SunOS5)
diff -r -u bash-2.05b/builtins/eval.def bash/builtins/eval.def
--- bash-2.05b/builtins/eval.def	2002-04-08 13:21:04.000000000 -0400
+++ bash/builtins/eval.def	2004-01-02 20:52:38.000000000 -0500
@@ -51,3 +51,687 @@
   /* Note that parse_and_execute () frees the string it is passed. */
   return (list ? parse_and_execute (string_list (list), "eval", SEVAL_NOHIST) : EXECUTION_SUCCESS);
 }
+
+
+
+/* Emulate Python's map() function.
+ *
+ * --William Park <opengeometry@yahoo.ca>
+ */
+
+$BUILTIN arraymap
+$FUNCTION arraymap_builtin
+$SHORT_DOC arraymap command name [name ...]
+Mimicking Python's map() function, it runs 'command' for each element of
+arrays 'name', ... in parallel.  'command' should take as many positional
+parameters as there are arrays.  This is modified version of 'eval'
+builtins, and is equivalent to
+    command "${name[0]}" "${name[0]}" ...
+    command "${name[1]}" "${name[1]}" ...
+    ...
+    command "${name[N]}" "${name[N]}" ...
+where 'N' is the maximum of all indexes.  Array elements are referenced by
+index key, starting from 0 to N, not the order of storage.  So, there can
+be empty parameters.
+$END
+
+
+int
+arraymap_builtin (list)
+    WORD_LIST *list;
+{
+#if defined (ARRAY_VARS)
+    char *name, *command, *eval_string;
+    int i, n, size, eval_len;
+    SHELL_VAR *var;
+    WORD_LIST *t;
+
+    if (no_options (list))
+	return (EX_USAGE);
+    list = loptend;	/* skip over possible `--' */
+
+    if (list == 0) 		/* 0 argument */
+	return (EXECUTION_SUCCESS);
+
+    command = list->word->word;		/* no checking */
+
+    list = list->next;
+    if (list == 0) 		/* 1 argument: arraymap command */
+	return (EXECUTION_SUCCESS);
+
+    /* 2 or more arguments: arraymap command a ... */
+
+    n = 0;
+    size = strlen (command);
+    for (t = list; t != 0; t = t->next) {
+	name = t->word->word;
+	if (legal_identifier(name) == 0) {
+	    sh_invalidid (name);
+	    return (EXECUTION_FAILURE);
+	}
+
+	var = find_variable (name);
+	if (var == 0 || array_p (var) == 0) {
+	    sh_notfound (name);
+	    return (EXECUTION_FAILURE);
+	}
+
+	i = array_max_index (array_cell (var));
+	n = (n > i) ? n : i;		/* max of all index */
+
+	/* ' "${name[index]}"'  -->  name + index + 8 */
+	size += strlen (name) + INT_STRLEN_BOUND(intmax_t) + 8;
+    }
+
+    /* command "${name[0]}" "${name[0]}" ...
+     * ...
+     * command "${name[n]}" "${name[n]}" ...
+     */
+    for (i = 0; i <= n; i++) {
+	eval_string = (char *)xmalloc (size + 1);
+
+	strcpy (eval_string, command);
+	eval_len = strlen (eval_string);
+
+	for (t = list; t != 0; t = t->next) {
+	    name = t->word->word;
+	    sprintf (eval_string + eval_len, " \"${%s[%d]}\"", name, i);
+	    eval_len = strlen (eval_string);
+	}
+
+	/* Note that parse_and_execute () frees the string it is passed. */
+	if (parse_and_execute (eval_string, "arraymap", SEVAL_NOHIST) != EXECUTION_SUCCESS) 
+	    return (EXECUTION_FAILURE);
+    }
+
+    return (EXECUTION_SUCCESS);
+#endif	/* ARRAY_VARS */
+}
+
+
+
+/* Emulate Python's filter() function.
+ *
+ * --William Park <opengeometry@yahoo.ca>
+ */
+
+$BUILTIN arrayfilter
+$FUNCTION arrayfilter_builtin
+$SHORT_DOC arrayfilter filter name
+Mimicking Python's filter() function, it runs 'filter' for each element of
+array 'name'.  It returns the array elements, for which 'filter' returns
+success (0).
+$END
+
+
+int
+arrayfilter_builtin (list)
+    WORD_LIST *list;
+{
+#if defined (ARRAY_VARS)
+    char *name, *filter, *eval_string;
+    int size;
+    SHELL_VAR *var;
+    ARRAY *a;
+    ARRAY_ELEMENT *ae;
+
+    if (no_options (list))
+	return (EX_USAGE);
+    list = loptend;	/* skip over possible `--' */
+
+    if (list == 0) 		/* 0 argument */
+	return (EXECUTION_SUCCESS);
+
+    filter = list->word->word;		/* no checking */
+
+    list = list->next;
+    if (list == 0) 		/* 1 argument: arrayfilter filter */
+	return (EXECUTION_SUCCESS);
+
+    name = list->word->word;	/* 2 arguments: arrayfilter filter name */
+    if (legal_identifier(name) == 0) {
+	sh_invalidid (name);
+	return (EXECUTION_FAILURE);
+    }
+    var = find_variable (name);
+    if (var == 0 || array_p (var) == 0) {
+	sh_notfound (name);
+	return (EXECUTION_FAILURE);
+    }
+
+    /* filter "value"
+     * ...
+     * filter "value"
+     */
+    a = array_cell (var);
+    if (a == 0 || array_empty (a)) return;	/* do nothing */
+
+    for (ae = element_forw (a->head); ae != a->head; ae = element_forw (ae)) {
+	size = strlen (filter) + strlen(element_value (ae)) + 3;
+
+	eval_string = (char *)xmalloc (size + 1);
+	sprintf (eval_string, "%s \"%s\"", filter, element_value (ae));
+
+	/* Note that parse_and_execute () frees the string it is passed. */
+	if (parse_and_execute (eval_string, "arrayfilter", SEVAL_NOHIST) == EXECUTION_SUCCESS)
+	    puts (element_value (ae));
+    }
+
+    return (EXECUTION_SUCCESS);
+#endif	/* ARRAY_VARS */
+}
+
+
+
+/* Add some of Python's list/dict functionalities.
+ *
+ * --William Park <opengeometry@yahoo.ca>
+ */
+
+#include <sys/types.h>		/* for regex(3) */
+#include <regex.h>		/* for regex(3) */
+
+$BUILTIN array
+$FUNCTION array_builtin
+$SHORT_DOC array [-kvlrsc] [-i value] [-j sep] [-EV regex] name [arg...]
+By default, print array indexes and values, separated by '\t', mimicking
+dict.items() in Python.  Only one option is allowed, so the last one takes
+effect.
+    -k          Print only indexes.  --> dict.keys()
+    -v          Print only values.  --> dict.values() or ${name[*]}
+    -l          Print size of each array element.  -->  [len(v) for v in list] or ${#name[*]}
+    -i value    Print all indexes that have 'value'.  --> list.index(value), repeat...
+    -j sep      Print all values with 'sep' separator.  --> sep.join()
+The following operation changes the array in-place.
+    -r          Reverse the array  --> list.reverse()
+    -s          Sort on array element's value  --> list.sort()
+    -c          Collapse the array, so that there is no missing index.
+
+If one or more arguments are present, then append them sequentially to the
+end of array, mimicking list.append(arg) or list.extend([arg,...]) in
+Python.  It doesn't create a new array, so create it manually.
+    -E regex	Extract regex(7) patterns from 'arg' strings, and append
+		each match to the end of array.  Unlike Python, null string
+		is not produced nor added.  (-e as in egrep)
+    -V regex	Remove regex(7) patterns from 'arg' strings, and append the
+		remaining segments to the end of array.  Unlike Python,
+		null string is not produced nor added.  (-v as in egrep)
+		--> list.extend(re.split(regex,arg)), repeat...
+$END
+
+
+/* Wrapper around inttostr() in ../lib/sh/itos.c, to convert array index
+ * (arrayind_t) to string.  One can use itos(), but it copies string which
+ * requires an extra step of freeing it.
+ */
+static char *
+element_index_to_string (ae)
+    ARRAY_ELEMENT *ae;
+{
+    /* 'static' to survive outside the function, but is not intended for
+     * long term storage. */
+    static char indstr[INT_STRLEN_BOUND(intmax_t) + 1];
+    
+    return inttostr (element_index (ae), indstr, sizeof (indstr));
+}
+
+
+/* Copied from array_walk() in ../array.c.  For each array element, print its
+ * index key, value, or both index and value, separated by '\t'.  Similiar to
+ * dict.keys(), dict.values(), and dict.items() in Python.
+ */
+static void
+print_elements (var, flag)
+    SHELL_VAR *var;
+    int flag;	/* 'k', 'v', 'l', or anything else (default) */
+{
+    ARRAY *a;
+    ARRAY_ELEMENT *ae;
+
+    a = array_cell (var);
+    if (a == 0 || array_empty (a)) return;	/* do nothing */
+
+    for (ae = element_forw (a->head); ae != a->head; ae = element_forw (ae))
+	switch (flag) {
+	case 'k':
+	    puts (element_index_to_string (ae));
+	    break;
+	case 'v':
+	    puts (element_value (ae));
+	    break;
+	case 'l':
+	    printf ("%d\n", strlen (element_value (ae)));
+	    break;
+	default:
+	    printf ("%s\t%s\n", element_index_to_string (ae), element_value (ae));
+	    break;
+	}
+}
+
+
+/* Copied from array_walk() in ../array.c.  Find all array elements with
+ * 'value', and print their index keys.  Similiar to list.index(value) in
+ * Python.
+ */
+static void
+print_all_indexes_with_value (var, value)
+    SHELL_VAR *var;
+    char *value;
+{
+    ARRAY *a;
+    ARRAY_ELEMENT *ae;
+
+    a = array_cell (var);
+    if (a == 0 || array_empty (a)) return;	/* do nothing */
+
+    for (ae = element_forw (a->head); ae != a->head; ae = element_forw (ae))
+	if (strcmp (element_value (ae), value) == 0) 
+	    puts (element_index_to_string (ae));
+}
+
+
+/* Set array index so that they are from 0 to n-1, where n is the number of
+ * elements that the array has.
+ */
+static void
+array_collapse (var)
+    SHELL_VAR *var;
+{
+    ARRAY *a;
+    ARRAY_ELEMENT *ae;
+    arrayind_t i, n;
+
+    a = array_cell (var);
+    if (a == 0 || array_empty (a)) return;	/* do nothing */
+    
+    n = array_num_elements (a);
+    ae = a->head;
+    for (i = 0; i < n; i++) {
+	ae = element_forw (ae);
+	element_index (ae) = i;
+    }
+}
+
+
+/* Reverse the array order, by swapping the element values.  The index keys are
+ * unchanged.  Similiar to list.reverse() in Python.
+ */
+static void
+array_reverse (var)
+    SHELL_VAR *var;
+{
+    ARRAY *a;
+    ARRAY_ELEMENT *ae, *be;
+    char *t;
+
+    a = array_cell (var);
+    if (a == 0 || array_empty (a)) return;	/* do nothing */
+
+    /* 'ae' goes forward, and 'be' goes backward */
+    for (ae = element_forw (a->head), be = element_back (a->head); 
+	 ae != a->head && be != a->head && element_index (ae) < element_index (be);
+	 ae = element_forw (ae), be = element_back(be))
+    {
+	t = element_value (ae);		/* swap the values */
+	element_value (ae) = element_value (be);
+	element_value (be) = t;
+    }
+}
+
+
+/* Sort the array, either based on element's value or element's length.
+ * Similiar to list.sort() in Python.
+ */
+static void
+array_sort (var, flag)
+    SHELL_VAR *var;
+    int flag;
+{
+    ARRAY *a;
+    ARRAY_ELEMENT *ae;
+    char **base;	/* array holding pointers to element values */
+    int n, i;
+
+    int my_strcmp (x, y) char **x, **y;
+    {
+	strcmp (*x, *y);
+    }
+    int my_intcmp (x, y) char **x, **y;
+    {
+	int i = atoi (*x);
+	int j = atoi (*y);
+	
+	if (i < j) return -1;
+	if (i > j) return 1;
+	return 0;
+    }
+
+    a = array_cell (var);
+    if (a == 0 || array_empty (a)) return;	/* do nothing */
+
+    n = array_num_elements (a);
+    base = (char **) xmalloc (n * sizeof (char *));
+    ae = a->head;
+    for (i = 0; i < n; i++) {
+	ae = element_forw (ae);
+	base[i] = element_value (ae);
+    }
+
+    if (integer_p (var)) 
+	qsort (base, n, sizeof (char *), my_intcmp);
+    else
+	qsort (base, n, sizeof (char *), my_strcmp);
+
+    ae = a->head;
+    for (i = 0; i < n; i++) {
+	ae = element_forw (ae);
+	element_value (ae) = base[i];
+    }
+    free (base);
+}
+
+
+/* Copied from bind_array_variable() in ../arrayfunc.c.  Find the last index and
+ * append right after it.  Actually, array_insert() in ../array.c inserts it
+ * "before" the head, which is effectively appending it because ARRAY is
+ * circular linked list.  Similiar to list.append() in Python.
+ */
+static void
+array_append (var, arg)
+    SHELL_VAR *var;
+    char *arg;		/* raw string */
+{
+    char *value;
+    arrayind_t N;
+    
+    if (readonly_p (var)) 
+	err_readonly (var->name);
+    else if (noassign_p (var))
+	return;
+    else {
+	N = array_max_index (array_cell (var));		/* -1 if empty */
+	value = make_variable_value (var, arg);
+	if (var->assign_func)
+	    (*var->assign_func) (var, value, N+1);
+	else
+	    array_insert (array_cell (var), N+1, value);
+	FREE (value);
+    }
+}
+
+
+int
+array_builtin (list)
+    WORD_LIST *list;
+{
+#if defined (ARRAY_VARS)
+    char *name, *arg, *sep, *value, *regex;
+    SHELL_VAR *var;
+    int flag, opt;
+
+    flag = 0;
+    sep = value = (char *)NULL;
+
+    reset_internal_getopt ();
+    while ((opt = internal_getopt (list, "kvlrsci:j:E:V:")) != -1) {
+	switch (opt) {
+	case 'k':
+	case 'v':
+	case 'l':
+	case 'r':
+	case 's':
+	case 'c':
+	    flag = opt;
+	    break;
+	case 'i':
+	    flag = opt;
+	    value = list_optarg;
+	    break;
+	case 'j':
+	    flag = opt;
+	    sep = list_optarg;
+	    break;
+	case 'E':
+	case 'V':
+	    flag = opt;
+	    regex = list_optarg;
+	    break;
+	default:
+	    builtin_usage ();
+	    return (EX_USAGE);
+	}
+    }
+    list = loptend;
+
+    if (list == 0) 		/* 0 argument */
+	return (EXECUTION_SUCCESS);
+
+    name = list->word->word;		/* first argument */
+    if (legal_identifier(name) == 0) {
+	sh_invalidid (name);
+	return (EXECUTION_FAILURE);
+    }
+    var = find_variable (name);
+    if (var == 0 || array_p (var) == 0) {
+	sh_notfound (name);
+	return (EXECUTION_FAILURE);
+    }
+
+    list = list->next;
+
+    if (list == 0) {		/* 1 argument: array [...] name */
+	switch (flag) {
+	case 'r':		/* array -r name */
+	    array_reverse (var);
+	    break;
+	case 's':		/* array -s name */
+	    array_sort (var, flag);
+	    break;
+	case 'c':		/* array -c name */
+	    array_collapse (var);
+	    break;
+	case 'i':		/* array -i value name */
+	    print_all_indexes_with_value (var, value);
+	    break;
+	case 'j':		/* array -j sep name */
+	    arg = array_to_string (array_cell (var), sep, 0 /* no quoting */);
+	    puts (arg);
+	    break;
+	case 'k':		/* array -k name */
+	case 'v':		/* array -v name */
+	case 'l':		/* array -l name */
+	default:		/* array name */
+	    print_elements (var, flag);
+	    break;
+	}
+    }
+    
+    /* 2 or more arguments.  So, we are appending.  If 'list == 0' already, then
+     * it falls through.
+     */
+    while (list) {
+	regex_t preg;
+	regmatch_t pmatch[1];
+	char *head, *body, *tail, *subarg;
+	int a, b, eflag;
+
+	arg = list->word->word;		/* array -[ev] regex name arg... */
+
+	if (flag == 'E' || flag == 'V') {
+	    if (regcomp (&preg, regex, REG_EXTENDED) != 0) {
+		builtin_error ("%s: illegal regex", regex);
+		return (EXECUTION_FAILURE);
+	    }
+	    head = body = tail = arg;
+	    eflag = 0;
+	    while (*head && *body && *tail
+		   && regexec (&preg, body, 1, pmatch, eflag) == 0) {
+		a = pmatch[0].rm_so;
+		b = pmatch[0].rm_eo;
+		if (a != b) {
+		    body += a;
+		    tail += b;
+		    if (flag == 'E' && body != tail) {
+			subarg = substring (body, 0, tail - body);
+			array_append (var, subarg);
+			free (subarg);
+		    }
+		    if (flag == 'V' && head != body) {
+			subarg = substring (head, 0, body - head);
+			array_append (var, subarg);
+			free (subarg);
+		    }
+		    head = body = tail;
+		} else {
+		    body++;
+		    tail++;
+		}
+		eflag = REG_NOTBOL;
+	    }
+	    if (flag == 'V' && *head)
+		array_append (var, head);
+	    regfree (&preg);
+	} else
+	    array_append (var, arg);	/* append original 'arg' */
+	list = list->next;
+    }
+
+    stupidly_hack_special_variables (name);
+    /* fflush (stdout); */
+    return (EXECUTION_SUCCESS);
+#endif	/* ARRAY_VARS */
+}
+
+
+
+#ifdef EMBED_PYTHON
+#include "Python.h"	/* includes <stdio.h> */
+
+$BUILTIN embeddedpython
+$FUNCTION embeddedpython_builtin
+$DEPENDS_ON EMBED_PYTHON
+$SHORT_DOC embeddedpython [-cq] arg...
+Send the command-line arguments to embedded Python.  Syntax follows the
+normal Python, ie.
+    python scriptfile
+    python -c "command"
+except that multiple files or strings can be used.  By default, the
+arguments are script files, so sequentially send the file contents to
+Python via PyRun_SimpleFile().  With '-c' option, the arguments are command
+strings, so sequentially send the string contents to Python via
+PyRun_SimpleString().  For readability, leading whitespaces in the strings
+are removed.  '-q' stops the embedded Python via Py_Finalize().
+$END
+
+/* Fully embedded Python.  With Python compiled and installed to /usr/local as
+ * usual, Bash can be compiled with
+ *
+ *	./configure 
+ *	make CFLAGS="-DEMBED_PYTHON -I/usr/local/include/python2.2"
+ *	     LDFLAGS="-L/usr/local/lib/python2.2 -L/usr/local/lib/python2.2/config
+ *	              -Xlinker -export-dynamic"
+ *	     LOCAL_LIBS="-lpython2.2 -lpthread -lutil -lm"
+ *
+ * where '-lpython2.2 -lpthread -lutil -lm' were determined from Python's
+ * Makefile, and '-Xlinker -export-dynamic' were determined from 
+ *	import distutils.sysconfig
+ *	distutils.sysconfig.get_config_var('LINKFORSHARED')
+ * as described in Python documentation for embedding.
+ *
+ * If you don't want Python, then simply do
+ *	./configure
+ *	make
+ *
+ * Embedding is no longer needed.  You can run a Python script which repeatedly
+ * runs 'exec' statement.  Script, called 'coprocess.py', would go something
+ * like
+ *
+ *	import sys
+ *	fifo_in = sys.argv[1]
+ *	fifo_out = sys.argv[2]
+ *	while 1:
+ *	    fin = open(fifo_in, "r")
+ *	    fout = open(fifo_out, "w")
+ *	    sys.stdout = fout
+ *	    exec fin
+ *	    sys.stdout.flush()
+ *	    fout.close()
+ *	    fin.close()
+ *
+ * Then, you can do
+ *
+ *	mkfifo in out 
+ *	python coprocess.py in out &
+ *
+ *	echo "print 1.0+2.0" > in
+ *	cat out
+ *	echo "import math" > in
+ *	echo "print math.pi" > in
+ *	cat out
+ *	
+ * --William Park <opengeometry@yahoo.ca>
+ */
+
+int
+embeddedpython_builtin (list)
+    WORD_LIST *list;
+{
+    char *arg;
+    int opt, cflag, out;
+
+    cflag = 0;
+
+    reset_internal_getopt ();
+    while ((opt = internal_getopt (list, "cq")) != -1) {
+	switch (opt) {
+	case 'c':
+	    cflag = 1;
+	    break;
+	case 'q':
+	    Py_Finalize();
+	    break;
+	default:
+	    builtin_usage ();
+	    return (EX_USAGE);
+	}
+    }
+    list = loptend;
+
+    if (list == 0) 		/* 0 argument */
+	return (EXECUTION_SUCCESS);
+
+    if (! Py_IsInitialized())
+	Py_Initialize();
+
+    if (cflag) {		/* send string */
+	char *t;
+
+	for ( ; list; list = list->next) {
+	    arg = list->word->word;
+	    while (*arg && spctabnl (*arg) && isifs (*arg))
+		arg++;
+	    out = PyRun_SimpleString (arg);
+	    if (out)
+		return (EXECUTION_FAILURE);
+	}
+    } else {			/* send file */
+	FILE *fd;
+
+	for ( ; list; list = list->next) {
+	    arg = list->word->word;
+	    fd = fopen (arg, "r");
+	    if (fd == NULL) {
+		builtin_error ("cannot open file `%s'", arg);
+		return (EXECUTION_FAILURE);
+	    }
+	    out = PyRun_SimpleFile (fd, arg);
+	    fclose (fd);
+	    if (out)
+		return (EXECUTION_FAILURE);
+	}
+    }
+
+    fflush (stdout);
+    return (EXECUTION_SUCCESS);
+}
+#endif	/* EMBED_PYTHON */
diff -r -u bash-2.05b/builtins/read.def bash/builtins/read.def
--- bash-2.05b/builtins/read.def	2002-03-19 14:33:41.000000000 -0500
+++ bash/builtins/read.def	2004-01-02 16:46:39.000000000 -0500
@@ -23,7 +23,7 @@
 
 $BUILTIN read
 $FUNCTION read_builtin
-$SHORT_DOC read [-ers] [-u fd] [-t timeout] [-p prompt] [-a array] [-n nchars] [-d delim] [name ...]
+$SHORT_DOC read [-ersDN] [-u fd] [-t timeout] [-p prompt] [-a array] [-n nchars] [-d delim] [-F x,y,z,...] [-S x~y] [name ...]
 One line is read from the standard input, or from file descriptor FD if the
 -u option is supplied, and the first word is assigned to the first NAME,
 the second word to the second NAME, and so on, with leftover words assigned
@@ -45,6 +45,14 @@
 its value is the default timeout.  The return code is zero, unless end-of-file
 is encountered, read times out, or an invalid file descriptor is supplied as
 the argument to -u.
+
+Awk-style NF and NR shell variables can be obtained by -N option.  The -F
+option takes comma separated positive integers, denoting the sizes of
+fieldwidth, similiar to Awk's FIELDWIDTHS.  The -S option will use Sed line
+address expression x~y to skip fields.  DOS lines with '\r\n' end-of-line
+can be read with -D option.  Array assignment is not affected by -F or -S
+options.
+
 $END
 
 #include <config.h>
@@ -140,6 +148,15 @@
   int rlind;
 #endif
 
+
+  int awk_NF_NR = 0;			/* Awk's NF and NR variables */
+  char *fieldwidths = (char *)NULL;	/* fixed fieldwidths */
+  WORD_LIST *fwlist, *fw;
+  int skip_like_sed = 0;	/* skip IFS fields, like Sed's x~y */
+  intmax_t x, y;
+  int dos_EOL = 0;		/* read DOS lines with '\r\n' end-of-line */
+
+
   USE_VAR(size);
   USE_VAR(i);
   USE_VAR(pass_next);
@@ -175,7 +192,7 @@
   delim = '\n';		/* read until newline */
 
   reset_internal_getopt ();
-  while ((opt = internal_getopt (list, "ersa:d:n:p:t:u:")) != -1)
+  while ((opt = internal_getopt (list, "ersa:d:n:p:t:u:DF:NS:")) != -1)
     {
       switch (opt)
 	{
@@ -239,6 +256,31 @@
 	case 'd':
 	  delim = *list_optarg;
 	  break;
+	case 'F':
+	    fieldwidths = list_optarg;
+	    break;
+	case 'N':
+	    awk_NF_NR = 1;
+	    break;
+	case 'S':
+	    skip_like_sed = 1;
+	    t = xstrchr (list_optarg, '~');
+	    if (t) {
+		t1 = list_optarg;
+		*t++ = '\0';
+		if (! (*t1 && legal_number (t1, &x) && x >= 0 &&
+			    *t && legal_number (t, &y) && y >= 0)) {
+		    builtin_error ("`%s~%s': must be `int~int'", t1, t);
+		    return (EXECUTION_FAILURE);
+		}
+	    } else {
+		builtin_error ("`%s': must be `int~int'", list_optarg);
+		return (EXECUTION_FAILURE);
+	    }
+	    break;
+	case 'D':
+	    dos_EOL = 1;
+	    break;
 	default:
 	  builtin_usage ();
 	  return (EX_USAGE);
@@ -454,6 +496,28 @@
 	break;
     }
   input_string[i] = '\0';
+ 
+
+  /* Read lines in DOS format (\r\n).  So, if we are reading by lines (ie. delim
+   * == '\n' and nchars == 0), then remove the extra '\r' at the end of string.
+   * This is equivalent to
+   *	read
+   *	read a b c ... <<< "${REPLY%$'\r'}"
+   * or
+   *	read
+   *	REPLY=`echo $REPLY | sed 's/^M$//'`	(use ^V^M)
+   *	read a b c ... <<< "$REPLY"
+   * or 
+   *	fromdos | read ...
+   * But, this avoids extra buffering of pipes and subshells, which is useful
+   * for reading from HTTP connection, since EOL there is defined as '\r\n' as
+   * well.
+   *
+   * --William Park <opengeometry@yahoo.ca>
+   */
+  if (dos_EOL && nchars == 0 && delim == '\n' && i > 0 && input_string[i-1] == '\r')
+      input_string[--i] = '\0';
+
 
 #if 1
   if (retval < 0)
@@ -492,6 +556,37 @@
 
   retval = eof ? EXECUTION_FAILURE : EXECUTION_SUCCESS;
 
+
+  /* The total number of IFS fields will be assigned to shell variable NF (like
+   * Awk variable NF).  You can feed input through
+   *	awk '{print NF, $0}'
+   * or use array variable
+   *	read -a var
+   *	NF=${#var[*]}
+   * but 
+   *	read -N
+   * is less typing.
+   *
+   * Also, shell variable NR is assigned the number of line read so far.
+   *
+   * --William Park <opengeometry@yahoo.ca>
+   */
+  if (awk_NF_NR) {
+      intmax_t n;
+
+      fwlist = list_string (input_string, ifs_chars, 0);
+      n = list_length ((GENERIC_LIST *)fwlist);
+      bind_var_to_int ("NF", n);
+      dispose_words (fwlist);
+
+      t = get_string_value ("NR");
+      if (t && *t && legal_number (t, &n) && n >= 0)
+	  bind_var_to_int ("NR", n + 1);
+      else 
+	  bind_var_to_int ("NR", 1);
+  }
+
+
 #if defined (ARRAY_VARS)
   /* If -a was given, take the string read, break it into a list of words,
      an assign them to `arrayname' in turn. */
@@ -545,6 +640,82 @@
       return (retval);
     }
 
+
+  /* Loop over the variables, and assign the fixed width fields, starting from
+   * the beginning of line.  Leftover variables are assigned '' (null).  It's
+   * here, because it supercedes the normal IFS parsing of 'read' command.  This
+   * is mimicking of Awk's FIELDWIDTHS with space replaced by comma, ie.
+   *	read -F 2,5,10
+   * will cut 2 char, 5 char, and 10 char strings out of the input line.
+   *
+   * --William Park <opengeometry@yahoo.ca>
+   */
+  if (fieldwidths) {
+      WORD_LIST *out;
+      intmax_t n;
+      
+      out = (WORD_LIST *)NULL;
+
+      fwlist = list_string (fieldwidths, ",", 0);
+      i = 0;
+      for (fw = fwlist; fw; fw = fw->next) {
+	  t = fw->word->word;
+	  if (i < strlen (input_string)) {
+	      if (*t && legal_number (t, &n) && n >= 0) {
+		  t = substring (input_string, i, i + n);
+		  i += n;
+	      } else {
+		  builtin_error ("%s: invalid fixed fieldwidths", t);
+		  dispose_words (fwlist);
+		  dispose_words (out);
+		  return (EXECUTION_FAILURE);
+	      }
+	  } else
+	      t = savestring ("");
+	  out = add_string_to_list (t, out);
+	  free (t);
+      }
+      dispose_words (fwlist);
+
+      fw = fwlist = REVERSE_LIST (out, WORD_LIST *);
+      for ( ; list; list = list->next) {	/* copied from below */
+	  varname = list->word->word;
+#if defined (ARRAY_VARS)
+	  if (legal_identifier (varname) == 0 && valid_array_reference (varname) == 0)
+#else
+	  if (legal_identifier (varname) == 0)
+#endif
+	  {
+	      sh_invalidid (varname);
+	      xfree (input_string);
+	      dispose_words (fwlist);
+	      return (EXECUTION_FAILURE);
+	  }
+	  if (skip_like_sed) {
+	      while (fw && --x > 0)
+		  fw = fw->next;
+	      x = y;		/* for next time */
+	  }
+	  if (fw) {
+	      var = bind_read_variable (varname, fw->word->word);
+	      fw = fw->next;
+	  } else 
+	      var = bind_read_variable (varname, "");
+	  if (var == 0) {
+	      xfree (input_string);
+	      dispose_words (fwlist);
+	      return (EXECUTION_FAILURE);
+	  }
+	  stupidly_hack_special_variables (varname);
+	  VUNSETATTR (var, att_invisible);
+      }
+      dispose_words (fwlist);
+
+      xfree (input_string);
+      return (retval);
+  }
+
+
   /* This code implements the Posix.2 spec for splitting the words
      read and assigning them to variables. */
   orig_input_string = input_string;
@@ -573,6 +744,26 @@
 	 the remaining variables are set to the empty string. */
       if (*input_string)
 	{
+
+	  /* Skip like Sed-style line address x~y.  Counting from 1, start
+	   * assigning variable from x'th field, and skip to next y'th fields
+	   * after that.
+	   *	read -S 1~2 	--> 1st 3rd 5th fields (ie. odd fields)
+	   *	read -S 2~2	--> 2nd 4th 6th fields (ie. even fields)
+	   *
+	   * --William Park <opengeometry@yahoo.ca>
+	   */
+	  if (skip_like_sed) {
+	      while (*input_string && --x > 0) {
+		  /* This call updates INPUT_STRING. */		/* copied from below */
+		  t = get_word_from_string (&input_string, ifs_chars, &e);
+		  if (t)
+		      *e = '\0';
+		  FREE (t);
+	      }
+	      x = y;		/* for next time */
+	  }
+
 	  /* This call updates INPUT_STRING. */
 	  t = get_word_from_string (&input_string, ifs_chars, &e);
 	  if (t)
diff -r -u bash-2.05b/execute_cmd.c bash/execute_cmd.c
--- bash-2.05b/execute_cmd.c	2002-03-18 13:24:22.000000000 -0500
+++ bash/execute_cmd.c	2004-01-02 16:46:39.000000000 -0500
@@ -1527,15 +1527,76 @@
   SHELL_VAR *old_value = (SHELL_VAR *)NULL; /* Remember the old value of x. */
 #endif
 
-  if (check_identifier (for_command->name, 1) == 0)
-    {
-      if (posixly_correct && interactive_shell == 0)
+  /* 
+   * Enable multiple loop variables in for-loop, with syntax
+   *	for  a,b,c,...  in list; do
+   *	    ...
+   *	done
+   * where no space is allowed around ',' (comma) because only one word is
+   * parsed.  List items are sequentially assigned to the loop variables 'a',
+   * 'b', 'c', etc.  If there is shortage of item, then the last iteration
+   * will run with '' (null) assigned to leftover variables.
+   *
+   * For the special case of 
+   *	for  x~y,a,b,c,...  in list; do
+   *	    ...
+   *	done
+   * where 'x' and 'y' are integers, loop variables will be sequentially
+   * assigned starting at x'th item and then skip to y'th items before next
+   * iteration (counting from 1).  Minimum value for 'x' and 'y' is 1; and, 0 is
+   * promoted to 1.  For example, '1~2,a' means start at 1st item and skip 2
+   * items after that, which means every odd items (1,3,5,...) will be selected.
+   * This is similar to 'sed' line address expression 'x~y'.
+   *
+   * --William Park <opengeometry@yahoo.ca>
+   */
+  int multi_variables, skip_like_sed;
+  intmax_t x, y;
+  WORD_LIST *list_of_for_variables, *fv;
+
+  multi_variables = skip_like_sed = 0;
+
+  if (xstrchr (for_command->name->word, ',') != NULL) {		/* split 'a,b,c,...' */
+      char *t;
+
+      multi_variables = 1;
+      list_of_for_variables = word_split (for_command->name, ",");
+
+      identifier = list_of_for_variables->word->word;
+      t = xstrchr (identifier, '~');
+      if (t) {		/* special case of 'x~y,a,b,c,...' */
+	  skip_like_sed = 1;
+	  *t++ = '\0';
+	  if (! (*identifier && legal_number (identifier, &x) && x >= 0 &&
+		      *t && legal_number (t, &y) && y >= 0)) {
+	      internal_error ("`%s~%s': must be `int~int'", identifier, t);
+	      goto Exit_by_Original_Code;
+	  }
+	  fv = list_of_for_variables;		/* delete 'x~y' */
+	  list_of_for_variables = list_of_for_variables->next;
+	  fv->next = (WORD_LIST *)NULL;
+	  dispose_words (fv);
+      }
+  }
+  
+  /* Check if a, b, c, ... are legal shell variables.
+   */
+  if (multi_variables) {
+      for (fv = list_of_for_variables; fv; fv = fv->next)
+	  if (check_identifier (fv->word, 1) == 0)
+	      goto Exit_by_Original_Code;
+  } else {
+      if (check_identifier (for_command->name, 1) == 0)		/* original code */
 	{
-	  last_command_exit_value = EX_USAGE;
-	  jump_to_top_level (EXITPROG);
+Exit_by_Original_Code:
+	  if (posixly_correct && interactive_shell == 0)
+	    {
+	      last_command_exit_value = EX_USAGE;
+	      jump_to_top_level (EXITPROG);
+	    }
+	  return (EXECUTION_FAILURE);
 	}
-      return (EXECUTION_FAILURE);
-    }
+  }
 
   loop_level++;
   identifier = for_command->name->word;
@@ -1561,21 +1622,53 @@
     {
       QUIT;
       this_command_name = (char *)NULL;
-      v = bind_variable (identifier, list->word->word);
-      if (readonly_p (v) || noassign_p (v))
-	{
-	  if (readonly_p (v) && interactive_shell == 0 && posixly_correct)
-	    {
-	      last_command_exit_value = EXECUTION_FAILURE;
-	      jump_to_top_level (FORCE_EOF);
-	    }
-	  else
+      
+      /* Assign list items into a, b, c, ...
+       */
+      if (multi_variables) {
+	  if (skip_like_sed) {
+	      while (list && --x > 0)
+		  list = list->next;
+	      if (list == 0) break;		/* end of list */
+	      x = y;		/* for next time */
+	  }
+
+	  for (fv = list_of_for_variables; fv; fv = fv->next) {
+	      identifier = fv->word->word;
+	      if (list) {
+		  /*
+		   * Goto the next item in the list, only if there are more
+		   * variables to assign.  If finished assigning, then leave the
+		   * incrementing for the next iteration.
+		   */
+		  v = bind_variable (identifier, list->word->word);
+		  if (fv->next)
+		      list = list->next;
+	      } else 			/* no more items */
+		  v = bind_variable (identifier, "");
+	      if (readonly_p (v) || noassign_p (v))
+		  goto Exit_by_Original_Code_2;
+	  }
+
+      } else {
+	  v = bind_variable (identifier, list->word->word);		/* original code */
+	  if (readonly_p (v) || noassign_p (v))
 	    {
-	      run_unwind_frame ("for");
-	      loop_level--;
-	      return (EXECUTION_FAILURE);
+Exit_by_Original_Code_2:
+	      if (readonly_p (v) && interactive_shell == 0 && posixly_correct)
+		{
+		  last_command_exit_value = EXECUTION_FAILURE;
+		  jump_to_top_level (FORCE_EOF);
+		}
+	      else
+		{
+		  run_unwind_frame ("for");
+		  loop_level--;
+		  return (EXECUTION_FAILURE);
+		}
 	    }
-	}
+      }
+
       retval = execute_command (for_command->action);
       REAP ();
       QUIT;
@@ -1592,6 +1685,8 @@
 	  if (continuing)
 	    break;
 	}
+
+      if (multi_variables && list == 0) break;
     }
 
   loop_level--;
@@ -1612,6 +1707,8 @@
     }
 #endif
 
+  if (multi_variables)
+      dispose_words (list_of_for_variables);
   dispose_words (releaser);
   discard_unwind_frame ("for");
   return (retval);
diff -r -u bash-2.05b/subst.c bash/subst.c
--- bash-2.05b/subst.c	2004-01-02 22:12:23.000000000 -0500
+++ bash/subst.c	2004-01-02 16:46:39.000000000 -0500
@@ -4115,6 +4115,11 @@
   t = temp ? savestring (temp) : savestring ("");
   t1 = dequote_string (t);
   free (t);
+#if defined (ARRAY_VARS)
+  if (valid_array_reference (name))
+      assign_array_element (name, t1);
+  else
+#endif
   bind_variable (name, t1);
   free (t1);
   return (temp);
@@ -4359,7 +4364,7 @@
 #if defined (ARRAY_VARS)
     case VT_ARRAYVAR:
       a = (ARRAY *)value;
-      len = array_num_elements (a) + 1;
+      len = array_num_elements (a);
       break;
 #endif
     }
@@ -4475,6 +4480,9 @@
   char *temp, *val, *tt;
   SHELL_VAR *v;
 
+  int skip_like_sed;
+  intmax_t x, y;
+
   if (value == 0)
     return ((char *)NULL);
 
@@ -4484,15 +4492,53 @@
   if (vtype == -1)
     return ((char *)NULL);
 
-  r = verify_substring_values (val, substr, vtype, &e1, &e2);
-  if (r <= 0)
-    return ((r == 0) ? &expand_param_error : (char *)NULL);
+  /* Check for Sed-style 'x~y' skipping, where 'x' and 'y' are positive
+   * integers.  Ie.
+   *	${*:1~2}, ${@:1~2}, ${array[*]:1~2}, ${array[@]:1~2}, ${string:1~2}
+   * all give every other positional parameters, array elements, and string
+   * characters, respectively, starting at 1.
+   *
+   * Whitespace is not allowed, in order to differentiate from a valid
+   * arithmetic bitwise negation (~).
+   *
+   * --William Park <opengeometry@yahoo.ca>
+   */
+  skip_like_sed = 0;
+  tt = xstrchr (substr, '~');
+  if (tt) {
+      *tt++ = '\0';
+      if (*substr && all_digits (substr) && legal_number (substr, &x) && x >= 0 &&
+	      *tt && all_digits (tt) && legal_number (tt, &y) && y >= 0)
+	  skip_like_sed = 1;
+      tt[-1] = '~';		/* restore the original string */
+  }
+  if (! skip_like_sed) {		/* original code */
+      r = verify_substring_values (val, substr, vtype, &e1, &e2);
+      if (r <= 0)
+	return ((r == 0) ? &expand_param_error : (char *)NULL);
+  }
 
   switch (vtype)
     {
     case VT_VARIABLE:
     case VT_ARRAYMEMBER:
-      tt = substring (val, e1, e2);
+      if (skip_like_sed) {
+	  int i, n;
+	  char *s;
+
+	  n = strlen (val);
+	  if (n == 0)
+	      return (char *)NULL;
+
+	  s = tt = (char *)xmalloc (n + 1);
+	  if (y <= 0) 
+	      y = 1;		/* don't want infinite loop */
+	  for (i = x; i < n; i += y)
+	      *s++ = val[i];
+	  *s = '\0';
+      } else
+	  tt = substring (val, e1, e2);               /* original code */
+
       if (vtype == VT_VARIABLE)
 	FREE (val);
       if (quoted & (Q_DOUBLE_QUOTES|Q_HERE_DOCUMENT))
@@ -4501,8 +4547,38 @@
 	temp = tt ? quote_escapes (tt) : (char *)NULL;
       FREE (tt);
       break;
+
     case VT_POSPARMS:
-      tt = pos_params (varname, e1, e2, quoted);
+      if (skip_like_sed) {
+	  WORD_LIST *out, *plist, *p;
+	  
+	  plist = list_rest_of_args ();
+	  if (plist == 0) 
+	      return (char *)NULL;
+
+	  out = (WORD_LIST *)NULL;
+	  for (p = plist; p; p = p->next) {
+	      while (p && --x > 0)
+		  p = p->next;
+	      if (p == 0) 
+		  break;
+	      out = make_word_list (make_bare_word (p->word->word), out);
+	      x = y;		/* for next time */
+	  }
+	  out = REVERSE_LIST (out, WORD_LIST *);
+
+	  if (varname[0] == '*')		/* copied from pos_params() */
+	      tt = (quoted & (Q_HERE_DOCUMENT|Q_DOUBLE_QUOTES)) ?
+		  string_list_dollar_star (quote_list (out)) : string_list (out);
+	  else
+	      tt = string_list ((quoted & (Q_HERE_DOCUMENT|Q_DOUBLE_QUOTES)) ?
+		      quote_list (out) : out);
+
+	  dispose_words (out);
+	  dispose_words (plist);
+      } else 
+	  tt = pos_params (varname, e1, e2, quoted);          /* original code */
+
       if ((quoted & (Q_DOUBLE_QUOTES|Q_HERE_DOCUMENT)) == 0)
 	{
 	  temp = tt ? quote_escapes (tt) : (char *)NULL;
@@ -4513,7 +4589,29 @@
       break;
 #if defined (ARRAY_VARS)
     case VT_ARRAYVAR:
-      tt = array_subrange (array_cell (v), e1, e2, quoted);
+      if (skip_like_sed) {
+	  ARRAY *out, *a;
+	  ARRAY_ELEMENT	*ae;
+
+	  a = array_cell (v);
+	  if (a == 0 || array_empty (a) || x > array_num_elements (a))
+	      return (char *)NULL;
+
+	  out = array_create ();
+	  x++;		/* only for first time, since array starts at 0 */
+	  for (ae = element_forw (a->head); ae != a->head; ae = element_forw (ae)) {
+	      while (ae != a->head && --x > 0)
+		  ae = element_forw (ae);
+	      if (ae == a->head)
+		  break;
+	      array_insert (out, element_index (ae), element_value (ae));
+	      x = y;	/* for next time */
+	  }
+	  tt = array_to_string (out, " ", quoted);
+	  array_dispose (out);
+      } else 
+	  tt = array_subrange (array_cell (v), e1, e2, quoted);		/* original code */
+
       if ((quoted & (Q_DOUBLE_QUOTES|Q_HERE_DOCUMENT)) == 0)
 	{
 	  temp = tt ? quote_escapes (tt) : (char *)NULL;


Relevant Pages


Quantcast