On Thu, 22 May 2003, Michal Moskal wrote:
> On Thu, May 22, 2003 at 06:51:04PM +0900, Nicolas Cannasse wrote:
> > > > val find : string -> string -> int (* find a substring *)
> > >
> > > Using KMP or some other technique? (i.e. if it takes O(m * n), where m and
> > > n are lengths of strings respectively? or O(log m * n + m))
> >
> > Right now only a naive O(m*n) technique. Please check the CVS, and submit me
> > some code if you have a better implementation.
>
> You can have a look at http://caml.inria.fr/Examples/oc/basics/kmp.ml
>
> I tried rewrite of while loop to tail recursion, but it looks ugly.
>
>
Sanity check this code, if you will:
let kmp p =
let m = String.length p in
let init_next p =
let next = Array.create m 0 in
let rec loop i j =
if i >= (m - 1) then next
else if p.(i) = p.(j) then
begin
next.(i+1) <- j+1;
loop (i+1) (j+1)
end
else if j = 0 then
begin
next.(i+1) <- 0;
loop (i+1) j
end
else
loop i next.(j)
in
loop 1 0
in
let next = init_next p in
let n = String.length s in
let rec loop2 i j =
if (j >= m) || (i >= n) then
if j >= m then i - m
else raise Not_found
else if s.(i) = p.(j) then
loop2 (i+1) (j+1)
else if j = 0 then
loop2 (i+1) j
else
loop2 i next.(j)
in
loop2 0 0
;;
I don't think it's that ugly. And it gets rid of all the references-
which probably means a speed up (haven't tested it yet though). Although
one thing we might want to consider: actually exporting the kmp_init
function and kmp function seperately. This way if I wanted to find the
same substring in a lot of different strings, I don't have to constantly
recreate the next table. Maybe something like:
let _kmp_init_next p m =
let next = Array.create m 0 in
let rec loop i j =
if i >= (m - 1) then next
else if p.(i) = p.(j) then
begin
next.(i+1) <- j+1;
loop (i+1) (j+1)
end
else if j = 0 then
begin
next.(i+1) <- 0;
loop (i+1) j
end
else
loop i next.(j)
in
loop 1 0
let kmp_init_next p = _kmp_init_next p (String.length p)
let _kmp next p s m =
let n = String.length s in
let rec loop2 i j =
if (j >= m) || (i >= n) then
if j >= m then i - m
else raise Not_found
else if s.(i) = p.(j) then
loop2 (i+1) (j+1)
else if j = 0 then
loop2 (i+1) j
else
loop2 i next.(j)
in
loop2 0 0
let kmp next p s = _kmp next p s (String.length p)
let find p s =
let m = String.length p in
let next = _kmp_init_next p m in
_kmp next p s m
Where kmp_init_next and kmp would be exported in the .mli file. You could
then write code like:
let find_all_substring substr strlist =
let next = kmp_init_next substr in
List.find_all (fun s -> try let _ = kmp next substr s in true
with Not_found -> false) strlist
;;
Brian
-------------------------------------------------------
This SF.net email is sponsored by: ObjectStore.
If flattening out C++ or Java code to make your application fit in a
relational database is painful, don't do it! Check out ObjectStore.
Now part of Progress Software. http://www.objectstore.net/sourceforge
|