Advertisement
Guest User

Untitled

a guest
Aug 23rd, 2017
152
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.86 KB | None | 0 0
  1. def _index_all_entries_in_index(index_name, entry_id_list=None, refresh=False):
  2.     """
  3.    Index all entries in the database into a given index
  4.    :param index_name: Index in which the records will be indexed
  5.    :param refresh: Specify if the call to ES will block until the indexation is done.
  6.    To be used only for integration tests
  7.    """
  8.  
  9.     def entry_generator(entry_id_list, zip_to_locality):
  10.         total = len(entry_id_list)
  11.         row_len = 10000
  12.         for start in range(0, total, row_len):
  13.             print('current document: %s/%s' % (start, total))
  14.             for entry in Entry.objects.filter(pk__in=entry_id_list[start:start + row_len]).select_related(
  15.                 'editentry',
  16.                 'zip_category',
  17.                 'editentry__category'
  18.             ).prefetch_related(
  19.                 'sd_entries',
  20.                 'categories',
  21.                 'contrats',
  22.                 'contrats__contrat_type',
  23.                 'zip_sub_categories',
  24.                 'editentry__sub_categories',
  25.                 'editentry__gallery_set',
  26.                 'editentry__openinghours_set',
  27.                 'editentry__socialaccount_set',
  28.                 'editentry__service_set',
  29.                 'editentry__payment_set',
  30.             ).prefetch_related(
  31.                 Prefetch(
  32.                     "contrats",
  33.                     queryset=Contrat.objects.valid(),
  34.                     to_attr="contrats_valid"
  35.                 )
  36.             ):
  37.                 yield db_entry_to_es_entry(entry, zip_to_locality)
  38.  
  39.     def bulk_actions_generator(documents):
  40.         for documents_list in documents:
  41.             for document in documents_list:
  42.                 if document:
  43.                     yield {
  44.                         '_op_type': 'index',
  45.                         '_index': index_name,
  46.                         '_id': document.meta.id,
  47.                         '_type': document._doc_type.name, '_source': document.to_dict()
  48.                     }
  49.  
  50.     if entry_id_list is None:
  51.         entry_id_list = [value[0] for value in Entry.objects.values_list('pk')]
  52.     logger.info('Starting indexing %s documents' % len(entry_id_list))
  53.  
  54.     # TODO: See whether to make a django foreign key or not. However it might
  55.     # be much faster this way
  56.     zip_to_locality = {value[0]: value[1:] for value in SdLocality.objects.values_list(*locality_values)}
  57.  
  58.     #es_client.indices.put_settings(index=index_name, body={"index": {"refresh_interval": "-1"}})
  59.  
  60.     helpers.bulk(
  61.         es_client,
  62.         bulk_actions_generator(entry_generator(entry_id_list, zip_to_locality)),
  63.         refresh=refresh,
  64.         chunk_size=settings.ELASTIC_SEARCH_BULK_SIZE,
  65.         request_timeout=settings.ELASTIC_SEARCH_BULK_TIMEOUT
  66.     )
  67.  
  68.     #es_client.indices.put_settings(index=index_name, body={"index": {"refresh_interval": "1s"}})
  69.  
  70.     return len(entry_id_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement