summary refs log tree commit diff
diff options
context:
space:
mode:
authorLeah Neukirchen <leah@vuxu.org>2022-11-15 01:20:07 +0100
committerLeah Neukirchen <leah@vuxu.org>2022-11-15 01:20:07 +0100
commit2f6a20240df228e71503c2dc1e3a41e75d50f54b (patch)
tree7e9347346ccbb697a229d1bfda3fd596ed106aa1
parent4e97f590bcf7d039744ba4c8a94a9cda333abbf9 (diff)
downloadmew-2f6a20240df228e71503c2dc1e3a41e75d50f54b.tar.gz
mew-2f6a20240df228e71503c2dc1e3a41e75d50f54b.tar.xz
mew-2f6a20240df228e71503c2dc1e3a41e75d50f54b.zip
add search
-rw-r--r--mew.scm29
-rw-r--r--mew.svnwiki7
-rw-r--r--tests/test.mew18
3 files changed, 53 insertions, 1 deletions
diff --git a/mew.scm b/mew.scm
index 902f750..7756746 100644
--- a/mew.scm
+++ b/mew.scm
@@ -17,7 +17,7 @@
      one-of op op*
      per prn proj puts
      range rep
-     sing? seq set set-at str slurp
+     sing? search seq set set-at str slurp
      tally-accumulator tbl time
      while
      uniq-accumulator unlist until
@@ -800,6 +800,33 @@
                      (map list (cdr exn))))
       (apply fail '(exn) exn args)))
 
+  ;; The QuickSearch algorithm, for generic sequences
+  ;; http://www-igm.univ-mlv.fr/~lecroq/string/node19.html#SECTION00190
+  (define search
+    (case-lambda
+      ((needle haystack) (search needle haystack 0))
+      ((needle haystack start)
+       (let* ((lh (len haystack))
+              (ln (len needle))
+              (t  (into (tbl) (gmap (lambda (c i)
+                                      (cons c (- ln i)))
+                                    (gen needle)
+                                    (range 0)))))
+         (def (match? offset)
+           (let loop ((i (dec ln)))
+             (and (=? (at haystack (+ i offset)) (at needle i))
+                  (or (zero? i)
+                      (loop (dec i))))))
+
+         (if (zero? ln)
+           start
+           (let loop ((i start))
+             (if (<= i (- lh ln))
+               (if (match? i)
+                 i
+                 (loop (+ i (get t (get haystack (+ i ln) #f) (inc ln)))))
+               #f)))))))
+
   (let ((old-repl-prompt (repl-prompt)))
     (repl-prompt (lambda ()
                    (let ((old-prompt (old-repl-prompt)))
diff --git a/mew.svnwiki b/mew.svnwiki
index b720eb9..03d91c7 100644
--- a/mew.svnwiki
+++ b/mew.svnwiki
@@ -331,6 +331,13 @@ If {{<obj>}} is a procedure, consider it a SRFI-158 generator
 and iterate over its values.
 If {{<obj>}} is a hash-table, iterate over its keys and values.
 
+<procedure>(search <needle> <haystack> <offset>?)</procedure>
+
+Returns the offset of the sequence (string/list/vector) {{<needle>}}
+in the sequence {{<haystack>}},
+starting no earlier than {{<offset>>}} (default: 0).
+Returns false if the sequence cannot be found.
+
 
 == Generators and Accumulators
 
diff --git a/tests/test.mew b/tests/test.mew
index a6632e9..b895780 100644
--- a/tests/test.mew
+++ b/tests/test.mew
@@ -498,4 +498,22 @@
   (test "foo 1 2" (condition-case (fail "foo ~a ~a" 1 2)
                     (e (exn) (get-condition-property e 'exn 'message)))))
 
+(test-group "search"
+  (test #f (search "foozing" "fooz00000fggg00foozixngbar"))
+  (test 7 (search "foozing" "foodingfoozingbar"))
+  (test 15 (search "foozing" "fooz00000fggg00foozingbar"))
+  (test 16 (search "foozinggg" "foozinggfoozinggfoozingggfoozingg"))
+  (test 0 (search "foobar" "foobar"))
+  (test #f (search "foobar" "foobax"))
+  (test #f (search "foobar" "000000"))
+  (test #f (search "foobar" "fooba"))
+  (test 4 (search "fofofox" "fofofofofox"))
+  (test #f (search "fofofox" ""))
+  (test 0 (search "" "fofofox"))
+  (test 6 (search "x" "fofofox"))
+  (test 4 (search "foo" "fooxfoo" 3))
+  (test #f (search "foo" "fooxfoo" 5))
+  (test 1 (search #(1 2 3) #(0 1 2 3 4)))
+  (test 1 (search '(1 2 3) '(0 1 2 3 4))))
+
 (test-exit)