From 2f6a20240df228e71503c2dc1e3a41e75d50f54b Mon Sep 17 00:00:00 2001 From: Leah Neukirchen Date: Tue, 15 Nov 2022 01:20:07 +0100 Subject: add search --- mew.scm | 29 ++++++++++++++++++++++++++++- mew.svnwiki | 7 +++++++ tests/test.mew | 18 ++++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/mew.scm b/mew.scm index 902f750..7756746 100644 --- a/mew.scm +++ b/mew.scm @@ -17,7 +17,7 @@ one-of op op* per prn proj puts range rep - sing? seq set set-at str slurp + sing? search seq set set-at str slurp tally-accumulator tbl time while uniq-accumulator unlist until @@ -800,6 +800,33 @@ (map list (cdr exn)))) (apply fail '(exn) exn args))) + ;; The QuickSearch algorithm, for generic sequences + ;; http://www-igm.univ-mlv.fr/~lecroq/string/node19.html#SECTION00190 + (define search + (case-lambda + ((needle haystack) (search needle haystack 0)) + ((needle haystack start) + (let* ((lh (len haystack)) + (ln (len needle)) + (t (into (tbl) (gmap (lambda (c i) + (cons c (- ln i))) + (gen needle) + (range 0))))) + (def (match? offset) + (let loop ((i (dec ln))) + (and (=? (at haystack (+ i offset)) (at needle i)) + (or (zero? i) + (loop (dec i)))))) + + (if (zero? ln) + start + (let loop ((i start)) + (if (<= i (- lh ln)) + (if (match? i) + i + (loop (+ i (get t (get haystack (+ i ln) #f) (inc ln))))) + #f))))))) + (let ((old-repl-prompt (repl-prompt))) (repl-prompt (lambda () (let ((old-prompt (old-repl-prompt))) diff --git a/mew.svnwiki b/mew.svnwiki index b720eb9..03d91c7 100644 --- a/mew.svnwiki +++ b/mew.svnwiki @@ -331,6 +331,13 @@ If {{}} is a procedure, consider it a SRFI-158 generator and iterate over its values. If {{}} is a hash-table, iterate over its keys and values. +(search ?) + +Returns the offset of the sequence (string/list/vector) {{}} +in the sequence {{}}, +starting no earlier than {{>}} (default: 0). +Returns false if the sequence cannot be found. + == Generators and Accumulators diff --git a/tests/test.mew b/tests/test.mew index a6632e9..b895780 100644 --- a/tests/test.mew +++ b/tests/test.mew @@ -498,4 +498,22 @@ (test "foo 1 2" (condition-case (fail "foo ~a ~a" 1 2) (e (exn) (get-condition-property e 'exn 'message))))) +(test-group "search" + (test #f (search "foozing" "fooz00000fggg00foozixngbar")) + (test 7 (search "foozing" "foodingfoozingbar")) + (test 15 (search "foozing" "fooz00000fggg00foozingbar")) + (test 16 (search "foozinggg" "foozinggfoozinggfoozingggfoozingg")) + (test 0 (search "foobar" "foobar")) + (test #f (search "foobar" "foobax")) + (test #f (search "foobar" "000000")) + (test #f (search "foobar" "fooba")) + (test 4 (search "fofofox" "fofofofofox")) + (test #f (search "fofofox" "")) + (test 0 (search "" "fofofox")) + (test 6 (search "x" "fofofox")) + (test 4 (search "foo" "fooxfoo" 3)) + (test #f (search "foo" "fooxfoo" 5)) + (test 1 (search #(1 2 3) #(0 1 2 3 4))) + (test 1 (search '(1 2 3) '(0 1 2 3 4)))) + (test-exit) -- cgit 1.4.1