logo       

Re: Pyrex v Psyco: msg#00028

python.pyrex

Subject: Re: Pyrex v Psyco

On 19/06/2006 8:59 AM, Philip Smith wrote:
Hi
First of all thanks to Greg for Pyrex (which I have come to very recently and which will certainly make wrapping libraries easier)
I've been looking for an alternative to Psyco for distributing Python apps to other platforms than Win32 - however (while not being in general a 'speed' junkie) I don't want to sacrifice performance.
I've been trying Pyrex out on a small text search programme which uses the Boyer-Moore algorithm and which I have adapted to search for all occurrences of a string in the given text.
With algorithmic improvements I managed something like a 10 fold increase in speed performance and Psyco gives me another 10 fold increase. I've read all the tips and tricks re: Pyrex I can find and tried to implement them but I can't seem to replicate the performance enhancement over Psyco which the Pyrex test suite demonstrates - in fact Psyco wins hands down every time.
Am I (as a newcomer) doing soething obviously wrong or is there something about this code that means one wouldn't expect particularly fast execution???

Here's a possibly more useful response than my earlier one, a real live working example:

|>> import strdemo as sd
|>> sd.quick_search.__doc__
'Return a list of the offsets at which <pattern> is found in <text>.'
|>> sd.quick_search('foo', ('fo'* 10 + 'o') * 3)
[18, 39, 60]
|>>

See attached file.
HTH,
John
# Demo Pyrex extension -- string handling
# Author: John Machin (sjmachin@xxxxxxxxxxx)
# Placed in the public domain 2006-06-19
# Minimally tested :-)

# === usually in a .pxi file ===
cdef extern from "Python.h":
void PyMem_Free(void *p)
void* PyMem_Malloc(int n) except NULL
int PyString_AsStringAndSize(object obj, char **buff, int *length)
except -1
int PyString_Check(object obj)
object PyString_FromStringAndSize(char *s, int len) # except NULL

cdef union ptr_union:
# I hate signed char :-)
# Greg doesn't :-(
unsigned char ** ucptr
char ** ptr

ctypedef unsigned char UC

ctypedef unsigned char *UCP

cdef string_in(object obj, unsigned char **buff, int *length):
cdef ptr_union pu
if not PyString_Check(obj):
raise TypeError('arg must be a string')
pu.ucptr = buff
PyString_AsStringAndSize(obj, pu.ptr, length)
buff = pu.ucptr

cdef extern from "string.h":
unsigned int strlen(char * cs)
void * memset(char * s, char c, unsigned int n)
int memcmp(void *s, void *t, int n)

# === end of includes ===

# A trivial function that returns 1 if stra == strb, else 0
# If you want True/False, wrap bool() around the call :-)

def strequal(stra, strb):
cdef UCP pa, pb
cdef int lena, lenb, i
string_in(stra, &pa, &lena)
string_in(strb, &pb, &lenb)
if lena != lenb:
return 0
for i from 0 <= i < lena:
if pa[i] != pb[i]:
return 0
return 1

# Sunday's Quick search (cut-down BM search)
# borrowed from http://www-igm.univ-mlv.fr/~lecroq/string/node19.html

"""
void preQsBc(char *x, int m, int qsBc[]) {
int i;

for (i = 0; i < ASIZE; ++i)
qsBc[i] = m + 1;
for (i = 0; i < m; ++i)
qsBc[x[i]] = m - i;
}

void QS(char *x, int m, char *y, int n) {
int j, qsBc[ASIZE];

/* Preprocessing */
preQsBc(x, m, qsBc);

/* Searching */
j = 0;
while (j <= n - m) {
if (memcmp(x, y + j, m) == 0)
OUTPUT(j);
j += qsBc[y[j + m]]; /* shift */
}
}
"""

def quick_search(pattern, text):
"""Return a list of the offsets at which <pattern> is found in <text>."""
cdef int i, j, m, n
cdef int qsBc[256]
cdef UCP x, y
# converting Python args to C locals
string_in(pattern, &x, &m)
string_in(text, &y, &n)
# Preprocessing
for i from 0 <= i < 256:
qsBc[i] = m + 1
for i from 0 <= i < m:
qsBc[x[i]] = m - i
# Searching
j = 0
results = []
rappend = results.append
# Note judicious use of Python syntax for the results list.
# We can't do any better by using the Python/C API,
# which in this case is ugly, tedious and error-prone.
while j <= n - m:
if memcmp(x, y + j, m) == 0:
rappend(j)
j = j + qsBc[y[j + m]]
return results_______________________________________________
Pyrex mailing list
Pyrex@xxxxxxxxxxxxxxxxx
http://lists.copyleft.no/mailman/listinfo/pyrex
<Prev in Thread] Current Thread [Next in Thread>
Google Custom Search

News | FAQ | advertise