Changeset 415
- Timestamp:
- 02/03/2010 06:45:12 PM (7 months ago)
- Location:
- projects/whoosh/trunk
- Files:
-
- 7 modified
-
src/whoosh/fields.py (modified) (8 diffs)
-
src/whoosh/qparser/default.py (modified) (3 diffs)
-
src/whoosh/qparser/simple.py (modified) (2 diffs)
-
src/whoosh/util.py (modified) (2 diffs)
-
tests/test_highlighting.py (modified) (1 diff)
-
tests/test_misc.py (modified) (4 diffs)
-
tests/test_writing.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
projects/whoosh/trunk/src/whoosh/fields.py
r407 r415 19 19 """ 20 20 21 import datetime 22 import re 21 import datetime, re, struct 23 22 24 23 from whoosh.analysis import IDAnalyzer, RegexAnalyzer, KeywordAnalyzer … … 32 31 class UnknownFieldError(Exception): 33 32 pass 34 35 33 36 34 … … 68 66 69 67 format = vector = scorable = stored = unique = None 68 parse_query = None 70 69 indexed = True 71 70 __inittypes__ = dict(format=Format, vector=Format, … … 108 107 if not self.format: 109 108 raise Exception("%s field cannot index without a format" % self.__class__) 110 111 109 if not isinstance(value, unicode): 112 110 raise ValueError("%r is not unicode" % value) 113 114 111 return self.format.word_values(value, mode="index", **kwargs) 112 113 def process_text(self, qstring, mode='', **kwargs): 114 if not self.format: 115 raise Exception("%s field has no format" % self) 116 return (t.text for t in self.format.analyze(qstring, mode=mode, **kwargs)) 115 117 116 118 … … 123 125 __inittypes__ = dict(stored=bool, unique=bool, field_boost=float) 124 126 125 def __init__(self, stored = False, unique = False, field_boost =1.0):127 def __init__(self, stored=False, unique=False, field_boost=1.0): 126 128 """ 127 129 :param stored: Whether the value of this field is stored with the document. 128 130 """ 129 self.format = Existence(analyzer = IDAnalyzer(), field_boost= field_boost)131 self.format = Existence(analyzer=IDAnalyzer(), field_boost= field_boost) 130 132 self.stored = stored 131 133 self.unique = unique … … 139 141 __inittypes__ = dict(stored=bool, unique=bool, expression=bool, field_boost=float) 140 142 141 def __init__(self, stored = False, unique = False, expression = None, field_boost =1.0):143 def __init__(self, stored=False, unique=False, expression=None, field_boost=1.0): 142 144 """ 143 145 :param stored: Whether the value of this field is stored with the document. … … 155 157 156 158 159 class NUMERIC(FieldType): 160 def __init__(self, type=int, stored=False, unique=False, field_boost=1.0): 161 self.type = type 162 self.stored = stored 163 self.unique = unique 164 self.format = Existence(analyzer=IDAnalyzer(), field_boost= field_boost) 165 166 def index(self, num): 167 method = getattr(self, self.type.__name__ + "_to_text") 168 return [(method(num), 1, '')] 169 170 def to_text(self, x): 171 ntype = self.type 172 method = getattr(self, ntype.__name__ + "_to_text") 173 return method(ntype(x)) 174 175 def process_text(self, text, **kwargs): 176 return (self.to_text(text), ) 177 178 def parse_query(self, fieldname, qstring, boost=1.0): 179 from whoosh import query 180 return query.Term(fieldname, self.to_text(qstring), boost=boost) 181 182 @staticmethod 183 def int_to_text(x): 184 x += (1<<(4<<2))-1 # 4 means 32-bits 185 return u"%08x" % x 186 187 @staticmethod 188 def text_to_int(text): 189 x = int(text, 16) 190 x -= (1<<(4<<2))-1 191 return x 192 193 @staticmethod 194 def long_to_text(x): 195 x += (1<<(8<<2))-1 196 return u"%016x" % x 197 198 @staticmethod 199 def text_to_long(text): 200 x = long(text, 16) 201 x -= (1<<(8<<2))-1 202 return x 203 204 @staticmethod 205 def float_to_text(x): 206 x = struct.unpack("<q", struct.pack("<d", x))[0] 207 x += (1<<(8<<2))-1 208 return u"%016x" % x 209 210 @staticmethod 211 def text_to_float(text): 212 x = long(text, 16) 213 x -= (1<<(8<<2))-1 214 x = struct.unpack("<d", struct.pack("<q", x))[0] 215 return x 216 217 157 218 class DATETIME(FieldType): 158 219 __inittypes__ = dict(stored=bool, unique=bool) 159 220 160 def __init__(self, stored = True, unique =False):221 def __init__(self, stored=False, unique=False): 161 222 """ 162 223 :param stored: Whether the value of this field is stored with the document. … … 172 233 raise ValueError("Value of DATETIME field must be a datetime object: %r" % dt) 173 234 174 text = dt.isoformat() 235 text = dt.isoformat() # 2010-02-02T17:06:19.109000 175 236 text = text.replace(" ", "").replace(":", "").replace("-", "").replace(".", "") 176 237 return [(text, 1, '')] 238 239 def process_text(self, text, **kwargs): 240 text = text.replace(" ", "").replace(":", "").replace("-", "").replace(".", "") 241 return (text, ) 242 243 def parse_query(self, fieldname, qstring, boost=1.0): 244 text = self.process_text(qstring) 245 from whoosh import query 246 return query.Prefix(fieldname, text, boost=boost) 247 248 249 class BOOLEAN(FieldType): 250 strings = (u"t", u"f") 251 trues = frozenset((u"t", u"true", u"yes", u"1")) 252 falses = frozenset((u"f", u"false", u"no", u"0")) 253 254 __inittypes__ = dict(stored=bool) 255 256 def __init__(self, stored=False): 257 self.stored = stored 258 self.format = Existence() 259 260 def index(self, bit): 261 if not isinstance(bit, bool): 262 raise ValueError("Value of BOOL field must be a bool object: %r" % bit) 263 return [(self.strings[int(bit)], 1, '')] 264 265 def parse_query(self, fieldname, qstring, boost=1.0): 266 from whoosh import query 267 text = None 268 if qstring in self.falses: 269 text = self.strings[0] 270 elif qstring in self.trues: 271 text = self.strings[1] 272 273 if text is None: 274 return query.NullQuery 275 return query.Term(fieldname, text, boost=boost) 177 276 178 277 -
projects/whoosh/trunk/src/whoosh/qparser/default.py
r404 r415 182 182 183 183 def get_term_text(self, field, text, **kwargs): 184 if not field.format:185 raise Exception("%s field has no format" % field)186 187 184 # Just take the first token 188 for t oken in field.format.analyze(text, mode="query", **kwargs):189 return t oken.text185 for t in field.process_text(text, mode="query", **kwargs): 186 return t 190 187 191 188 def make_term(self, fieldname, text): 192 189 field = self._field(fieldname) 193 190 if field: 194 text = self.get_term_text(field, text) 195 if not text: 191 if field.parse_query: 192 return field.parse_query(fieldname, text) 193 else: 194 text = self.get_term_text(field, text) 195 196 if text is None: 196 197 return NullQuery 197 198 return self.termclass(fieldname, text) … … 200 201 field = self._field(fieldname) 201 202 if field: 202 texts = [t.text for t in field.format.analyze(text, mode="query")] 203 if field.parse_query: 204 return field.parse_query(fieldname, text) 205 206 texts = list(field.process_text(text, mode="query")) 203 207 if not texts: 204 208 return self.termclass(fieldname, u'') … … 213 217 field = self._field(fieldname) 214 218 if field: 215 ptext = self.get_term_text(field, text, tokenize=False, removestops=False) 216 if ptext: text = ptext 219 text = self.get_term_text(field, text, tokenize=False, removestops=False) 217 220 return Wildcard(fieldname, text) 218 221 -
projects/whoosh/trunk/src/whoosh/qparser/simple.py
r404 r415 88 88 if self.schema: 89 89 field = self.schema[fieldname] 90 if not field.format: 91 raise Exception("%s field has no format" % field) 92 return [token.text for token in field.format.analyze(text, mode="query", **kwargs)] 90 return list(field.process_text(text, mode="query", **kwargs)) 93 91 else: 94 92 return [text] 95 93 96 94 def make_basic_clause(self, fieldname, text, boost=1.0): 95 if self.schema: 96 field = self.schema[fieldname] 97 if field.parse_query: 98 return field.parse_query(fieldname, text, boost=boost) 99 97 100 parts = self.get_term_text(fieldname, text) 98 101 if len(parts) > 1: … … 100 103 else: 101 104 return self.termclass(fieldname, parts[0], boost=boost) 102 105 103 106 def make_clause(self, text, boost=1.0): 104 107 return self.make_basic_clause(self.fieldname, text, boost=boost) -
projects/whoosh/trunk/src/whoosh/util.py
r406 r415 19 19 """ 20 20 21 from array import array 21 22 import codecs, re 22 23 … … 164 165 yield chr(i) + w[i:].encode("utf8") 165 166 last = w 166 167 168 169 def to_7bit(x, islong): 170 if not islong: 171 shift = 31 172 nchars = 5 173 else: 174 shift = 62 175 nchars = 10 176 177 buffer = array("c", "\x00" * nchars) 178 x += (1<<shift)-1 179 while x: 180 buffer[nchars-1] = chr(x & 0x7f) 181 x >>= 7 182 nchars -= 1 183 return buffer.tostring() 184 185 def from_7bit(text): 186 if len(text) == 5: 187 shift = 31 188 elif len(text) == 10: 189 shift = 62 190 else: 191 raise ValueError("text is not 5 or 10 bytes") 192 193 x = 0 194 for char in text: 195 x <<= 7 196 char = ord(char) 197 if char > 0x7f: 198 raise Exception 199 x |= char 200 x -= (1<<shift)-1 201 return x 167 202 168 203 def prefix_decode_all(ls): -
projects/whoosh/trunk/tests/test_highlighting.py
r400 r415 58 58 59 59 60 61 60 if __name__ == '__main__': 62 61 unittest.main() -
projects/whoosh/trunk/tests/test_misc.py
r412 r415 4 4 5 5 from whoosh.filedb.filestore import FileStorage 6 from whoosh.support.filelock import try_for 6 7 7 8 … … 13 14 def destroy_dir(self, name): 14 15 try: 15 os.rmdir("test _index")16 os.rmdir("testindex") 16 17 except: 17 18 pass … … 19 20 def clean_file(self, path): 20 21 if os.path.exists(path): 21 os.remove(path) 22 try: 23 os.remove(path) 24 except: 25 pass 22 26 23 27 def test_filelock_simple(self): 24 self.make_dir("test _index")25 st = FileStorage("test _index")28 self.make_dir("testindex") 29 st = FileStorage("testindex") 26 30 lock1 = st.lock("testlock") 27 31 lock2 = st.lock("testlock") … … 34 38 lock2.release() 35 39 36 self.clean_file("test _index/testlock")37 self.destroy_dir("test _index")40 self.clean_file("testindex/testlock") 41 self.destroy_dir("testindex") 38 42 39 43 def test_threaded_filelock(self): 40 self.make_dir("test _index")41 st = FileStorage("test _index")44 self.make_dir("testindex") 45 st = FileStorage("testindex") 42 46 lock1 = st.lock("testlock") 43 47 result = [] 44 48 49 # The thread function tries to acquire the lock and 50 # then quits 45 51 def fn(): 46 52 lock2 = st.lock("testlock") 47 lock2.acquire(blocking=True)48 result.append(True)49 lock2.release()50 53 gotit = try_for(lock2.acquire, 1.0, 0.1) 54 if gotit: 55 result.append(True) 56 lock2.release() 51 57 t = threading.Thread(target=fn) 58 59 # Acquire the lock in this thread 52 60 lock1.acquire() 61 # Start the other thread trying to acquire the lock 53 62 t.start() 54 time.sleep(0.1) 63 # Wait for a bit 64 time.sleep(0.15) 65 # Release the lock 55 66 lock1.release() 56 del lock1 57 time.sleep(0.1) 67 # Wait for the other thread to finish 68 t.join() 69 # If the other thread got the lock, it should have 70 # appended something to the "results" list. 58 71 self.assertEqual(len(result), 1) 59 72 60 self.clean_file("test _index/testlock")61 self.destroy_dir("test _index")73 self.clean_file("testindex/testlock") 74 self.destroy_dir("testindex") 62 75 63 76 -
projects/whoosh/trunk/tests/test_writing.py
r413 r415 13 13 def destroy_dir(self, name): 14 14 try: 15 os.rmdir("test _index")15 os.rmdir("testindex") 16 16 except: 17 17 pass … … 22 22 23 23 def test_asyncwriter(self): 24 self.make_dir("test _index")24 self.make_dir("testindex") 25 25 schema = fields.Schema(id=fields.ID, text=fields.TEXT) 26 ix = index.create_in("test _index", schema)26 ix = index.create_in("testindex", schema) 27 27 28 28 domain = (u"alfa", u"bravo", u"charlie", u"delta", u"echo", u"foxtrot", u"golf", u"hotel", u"india")
