[Bug 5251] [FO] Minimal match in starts-with(), ends-with()

http://www.w3.org/Bugs/Public/show_bug.cgi?id=5251





------- Comment #2 from mike@saxonica.com  2008-01-15 11:06 -------
After writing the proposal in comment #1 and sleeping on it, I realize that the
approach of defining these functions in terms of simpler functions such as
substring() and concat() could be taken further. Without relying on any
definition of "match" or "minimal match", we can define the functions
contains(), starts-with(), and ends-with() as follows:

contains(): return the result of the XPath expression:

some $M in 0 to string-length($arg1), $N in 0 to string-length($arg1) satisfies
compare(substring($arg1, $M, $N), $arg2, $coll) eq 0

starts-with(): return the result of the XPath expression

some $N in 0 to string-length($arg1) satisfies compare(substring($arg1, 1, $N),
$arg2, $coll) eq 0

ends-with(): return the result of the XPath expression

some $N in 0 to string-length($arg1) satisfies compare(substring($arg1, $N),
$arg2, $coll) eq 0

substring-before(): return the result of the XQuery expression:

  if ((contains($arg1, $arg2, $coll))
  then
    let $Z := string-length($arg1)
    let $M := min((1 to $Z)[some $L in 0 to $Z satisfies
compare(substring($arg1, ., $L), $arg2, $coll) eq 0])
        (: M is the position of the start of the first match :)
    let $L := max((1 to $Z)[compare(substring($arg1, $M, .), $arg2, $coll) eq
0])
        (: L is the length of the longest match at this position :) 
    let $I := max((0 to $L)[compare(substring($arg1, $M + $I, $L - $I), $arg2,
$coll) eq 0]
        (: I is the number of ignorable characters at the start of this match,
           so M+I is the position of the start of the minimal match :)
    return substring($arg1, 1, $M + $I - 1).
        (: return the substring containing all characters before the start of
the minimal match :)
  else ""

substring-after(): : return the result of the XQuery expression:

  if ((contains($arg1, $arg2, $coll))
  then
    let $Z := string-length($arg1)
    let $M := min((1 to $Z)[some $L in 0 to $Z satisfies
compare(substring($arg1, ., $L), $arg2, $coll) eq 0])
        (: M is the position of the start of the first match :)
    let $L := max((1 to $Z)[compare(substring($arg1, $M, .), $arg2, $coll) eq
0])
        (: L is the length of the longest match at this position :)  
    let $I := max((0 to $L)[compare(substring($arg1, $M, $L - $I), $arg2,
$coll) eq 0]
        (: I is the number of ignorable characters at the endof this match,
           so M+L-I is the position of the end of the minimal match :)
    return substring($arg1, $M + $L - $I)
        (: return all characters after the end of the first minimal match :)
  else ""

Received on Tuesday, 15 January 2008 11:06:49 UTC