| 1 | |
|---|
| 2 | |
|---|
| 3 | |
|---|
| 4 | |
|---|
| 5 | |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | |
|---|
| 9 | |
|---|
| 10 | |
|---|
| 11 | |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | |
|---|
| 17 | from whoosh.index import DeletionMixin |
|---|
| 18 | |
|---|
| 19 | |
|---|
| 20 | |
|---|
| 21 | class IndexingError(Exception): |
|---|
| 22 | pass |
|---|
| 23 | |
|---|
| 24 | |
|---|
| 25 | |
|---|
| 26 | |
|---|
| 27 | class IndexWriter(DeletionMixin): |
|---|
| 28 | """High-level object for writing to an index. |
|---|
| 29 | |
|---|
| 30 | To get a writer for a particular index, call |
|---|
| 31 | call :meth:`~whoosh.index.Index.writer` on the Index object. |
|---|
| 32 | |
|---|
| 33 | >>> writer = my_index.writer() |
|---|
| 34 | |
|---|
| 35 | You can use this object as a context manager. If an exception is thrown |
|---|
| 36 | from within the context it calls cancel(), otherwise it calls commit() |
|---|
| 37 | when the context exits. |
|---|
| 38 | """ |
|---|
| 39 | |
|---|
| 40 | def __enter__(self): |
|---|
| 41 | return self |
|---|
| 42 | |
|---|
| 43 | def __exit__(self, exc_type, exc_val, exc_tb): |
|---|
| 44 | if exc_type: |
|---|
| 45 | self.cancel() |
|---|
| 46 | else: |
|---|
| 47 | self.commit() |
|---|
| 48 | |
|---|
| 49 | def searcher(self, **kwargs): |
|---|
| 50 | """Returns a searcher for the existing index.""" |
|---|
| 51 | if not self._searcher: |
|---|
| 52 | self._searcher = self.index.searcher(**kwargs) |
|---|
| 53 | return self._searcher |
|---|
| 54 | |
|---|
| 55 | def _close_reader(self): |
|---|
| 56 | if self._searcher: |
|---|
| 57 | self._searcher.close() |
|---|
| 58 | self._searcher = None |
|---|
| 59 | |
|---|
| 60 | def delete_document(self, docnum, delete=True): |
|---|
| 61 | """Deletes a document by number.""" |
|---|
| 62 | raise NotImplementedError |
|---|
| 63 | |
|---|
| 64 | def add_document(self, **fields): |
|---|
| 65 | """Adds all the fields of a document at once. This is an alternative to calling |
|---|
| 66 | start_document(), add_field() [...], end_document(). |
|---|
| 67 | |
|---|
| 68 | The keyword arguments map field names to the values to index/store. |
|---|
| 69 | |
|---|
| 70 | For fields that are both indexed and stored, you can specify an alternate |
|---|
| 71 | value to store using a keyword argument in the form "_stored_<fieldname>". |
|---|
| 72 | For example, if you have a field named "title" and you want to index the |
|---|
| 73 | text "a b c" but store the text "e f g", use keyword arguments like this:: |
|---|
| 74 | |
|---|
| 75 | writer.add_document(title=u"a b c", _stored_title=u"e f g") |
|---|
| 76 | """ |
|---|
| 77 | raise NotImplementedError |
|---|
| 78 | |
|---|
| 79 | def update_document(self, **fields): |
|---|
| 80 | """Adds or replaces a document. At least one of the fields for which you |
|---|
| 81 | supply values must be marked as 'unique' in the index's schema. |
|---|
| 82 | |
|---|
| 83 | The keyword arguments map field names to the values to index/store. |
|---|
| 84 | |
|---|
| 85 | Note that this method will only replace a *committed* document; currently |
|---|
| 86 | it cannot replace documents you've added to the IndexWriter but haven't yet |
|---|
| 87 | committed. For example, if you do this: |
|---|
| 88 | |
|---|
| 89 | >>> writer.update_document(unique_id=u"1", content=u"Replace me") |
|---|
| 90 | >>> writer.update_document(unique_id=u"1", content=u"Replacement") |
|---|
| 91 | |
|---|
| 92 | ...this will add two documents with the same value of ``unique_id``, instead of |
|---|
| 93 | the second document replacing the first. |
|---|
| 94 | |
|---|
| 95 | For fields that are both indexed and stored, you can specify an alternate |
|---|
| 96 | value to store using a keyword argument in the form "_stored_<fieldname>". |
|---|
| 97 | For example, if you have a field named "title" and you want to index the |
|---|
| 98 | text "a b c" but store the text "e f g", use keyword arguments like this:: |
|---|
| 99 | |
|---|
| 100 | writer.update_document(title=u"a b c", _stored_title=u"e f g") |
|---|
| 101 | """ |
|---|
| 102 | |
|---|
| 103 | |
|---|
| 104 | unique_fields = [name for name, field |
|---|
| 105 | in self.index.schema.fields() |
|---|
| 106 | if name in fields and field.unique] |
|---|
| 107 | if not unique_fields: |
|---|
| 108 | raise IndexingError("None of the fields in %r are unique" % fields.keys()) |
|---|
| 109 | |
|---|
| 110 | |
|---|
| 111 | from whoosh import query |
|---|
| 112 | delquery = query.Or([query.Term(name, fields[name]) for name in unique_fields]) |
|---|
| 113 | delquery = delquery.normalize() |
|---|
| 114 | self.delete_by_query(delquery) |
|---|
| 115 | |
|---|
| 116 | |
|---|
| 117 | self.add_document(**fields) |
|---|
| 118 | |
|---|
| 119 | def commit(self): |
|---|
| 120 | """Finishes writing and unlocks the index. |
|---|
| 121 | """ |
|---|
| 122 | pass |
|---|
| 123 | |
|---|
| 124 | def cancel(self): |
|---|
| 125 | """Cancels any documents/deletions added by this object |
|---|
| 126 | and unlocks the index. |
|---|
| 127 | """ |
|---|
| 128 | pass |
|---|
| 129 | |
|---|
| 130 | |
|---|
| 131 | |
|---|
| 132 | |
|---|
| 133 | |
|---|