Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def _index_all_entries_in_index(index_name, entry_id_list=None, refresh=False):
- """
- Index all entries in the database into a given index
- :param index_name: Index in which the records will be indexed
- :param refresh: Specify if the call to ES will block until the indexation is done.
- To be used only for integration tests
- """
- def entry_generator(entry_id_list, zip_to_locality):
- total = len(entry_id_list)
- row_len = 10000
- for start in range(0, total, row_len):
- print('current document: %s/%s' % (start, total))
- for entry in Entry.objects.filter(pk__in=entry_id_list[start:start + row_len]).select_related(
- 'editentry',
- 'zip_category',
- 'editentry__category'
- ).prefetch_related(
- 'sd_entries',
- 'categories',
- 'contrats',
- 'contrats__contrat_type',
- 'zip_sub_categories',
- 'editentry__sub_categories',
- 'editentry__gallery_set',
- 'editentry__openinghours_set',
- 'editentry__socialaccount_set',
- 'editentry__service_set',
- 'editentry__payment_set',
- ).prefetch_related(
- Prefetch(
- "contrats",
- queryset=Contrat.objects.valid(),
- to_attr="contrats_valid"
- )
- ):
- yield db_entry_to_es_entry(entry, zip_to_locality)
- def bulk_actions_generator(documents):
- for documents_list in documents:
- for document in documents_list:
- if document:
- yield {
- '_op_type': 'index',
- '_index': index_name,
- '_id': document.meta.id,
- '_type': document._doc_type.name, '_source': document.to_dict()
- }
- if entry_id_list is None:
- entry_id_list = [value[0] for value in Entry.objects.values_list('pk')]
- logger.info('Starting indexing %s documents' % len(entry_id_list))
- # TODO: See whether to make a django foreign key or not. However it might
- # be much faster this way
- zip_to_locality = {value[0]: value[1:] for value in SdLocality.objects.values_list(*locality_values)}
- #es_client.indices.put_settings(index=index_name, body={"index": {"refresh_interval": "-1"}})
- helpers.bulk(
- es_client,
- bulk_actions_generator(entry_generator(entry_id_list, zip_to_locality)),
- refresh=refresh,
- chunk_size=settings.ELASTIC_SEARCH_BULK_SIZE,
- request_timeout=settings.ELASTIC_SEARCH_BULK_TIMEOUT
- )
- #es_client.indices.put_settings(index=index_name, body={"index": {"refresh_interval": "1s"}})
- return len(entry_id_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement