logo       
Google Custom Search
    AddThis Social Bookmark Button
-->

Extlib.list.unique: msg#00013

Subject: Extlib.list.unique
Hello evrybody.

Thanks for that lib I use evryday.

But, I notice that ExtList.unique is in O(n^2) this is a problem for me,
I often use list over 100000 elements(and some time over million).
So I wrote a replacement function that is faster O(n+n*log(n)).
It use about 5X the space of the parameter list. On my computer (512 Mo)
the limit before the stak overflow is just over a million.


let delete_double_unsort ?(last=false) ?(cmp=Pervasives.compare) l =
  let lg = List.length l in
      let tcn = Array.make lg (List.hd l,0) in
      let rec add_num n = function
          [] -> ()
        | h :: t -> tcn.(n) <- (h,n) ; (add_num (succ n) t)
      in
      let () = add_num 0 l in
      let compare x y = cmp (fst x) (fst y) in
      let tcn' = Array.copy tcn in
      let () = Array.stable_sort compare tcn in
      let plg = lg -1 in
      let rec get_dbl ac n =
        if n == plg then ac
        else
          let n' = n+1 in
          let h1 = tcn.(n) in
          let h2 = tcn.(n') in
          if (cmp (fst h1) (fst h2)) == 0
          then get_dbl ((snd (if last then h1 else h2))::ac) n'
          else get_dbl ac n'
      in
      let dbl = get_dbl [] 0 in
      let sdbl = List.sort Pervasives.compare dbl in
      let rec delete n = function
          [] ->
            let ac = ref [] in
            for i = n to lg -1 do
              ac := (fst tcn'.(i)) :: !ac
            done;
            List.rev !ac
        | na::ta when na == (snd tcn'.(n)) -> delete (n+1) ta
        | l ->  (fst tcn'.(n)) :: (delete (n+1) l)
      in delete 0 sdbl
;;

As you can see this function support a additionnal option that let you
select if you want delete the first or the last occurence of a term.
For the "equivalence" with ExtList.unique set "~last:true".


for 100000 elements :
Extlib.list.unique : 165.850 seconds
this function : 0.380 seconds
for 1000000 elements :
Extlib.list.unique : stop by error after 2 hours...
this function : 9.600 seconds



Even if you don't want to include it in the ExtLib I would be glade if
someone know how to improve it.



Here few test code:
(*
let bench n p =
  Random.self_init();
  let rec new_list n =
    if n == 0 then [] else (Random.int 100000)::(new_list (pred n))
  in
  let a_bench n =
    let l  = new_list n in Gc.compact();
    (*let t1 = Sys.time() in  ignore (delete_double_unsort_list l);*)
    (*let t1 = Sys.time() in  ignore (delete_double l);*)
    let t1 = Sys.time() in ignore ();(*(ExtLib.List.unique l);*)
    let t2 = Sys.time() in Gc.compact();
    (*let t3 = Sys.time() in ignore (ExtLib.List.unique l);*)
    let t3 = Sys.time() in ignore (delete_double_unsort l);
    let t4 = Sys.time() in
     (t2-.t1,t4-.t3)
  in
  let rec make_bench r n p =
    if p == 0 then r else make_bench ((a_bench n)::r) n (pred p)
  in
  let somme r =
    let a,b = List.split r in
      List.fold_left (+.) 0. a, List.fold_left (+.) 0. b
  in
  let t1,t2 = somme (make_bench [] n p) in
    Printf.printf "\n[%.3f] [%.3f]\n" t1 t2
;;
bench (int_of_string Sys.argv.(1)) (int_of_string Sys.argv.(2))
*)
(*
let verif n =
  Random.self_init();
  let rec new_list n =
    if n == 0 then [] else (Random.int 10)::(new_list (pred n))
  in
  let test l =
    let p l =  List.iter (Printf.printf "%d ") l in
   (*   p l ;print_newline (); *)
    let l1 = delete_double_unsort_list ~last:true l
    and l2 = delete_double_unsort ~last:true l
    and l3 = ExtLib.List.unique l
    in
 (*   p l1;print_newline ();
    p l2;print_newline ();
    p l3;print_newline (); *)
    (l1 = l2 && l1 = l3)
  in
    while (test (new_list n)) do (); done
;;
verif (int_of_string Sys.argv.(1));;
*)


        

        
                
___________________________________________________________________________ 
Appel audio GRATUIT partout dans le monde avec le nouveau Yahoo! Messenger 
Téléchargez cette version sur http://fr.messenger.yahoo.com



-------------------------------------------------------
This SF.Net email is sponsored by:
Power Architecture Resource Center: Free content, downloads, discussions,
and more. http://solutions.newsforge.com/ibmarch.tmpl


<Prev in Thread] Current Thread [Next in Thread>