Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import numpy as np
- import sys
- def main():
- t0 = time.time()
- vlans, requests = load_files()
- total = len(requests)
- no_redundancy = len(requests[requests[:,1] == 0])
- with_redundancy = total - no_redundancy
- output_rows = no_redundancy + with_redundancy * 2
- # allocate enough memory big enough for the output data, once.
- out = [np.zeros(output_rows, dtype=int) for i in range(4)]
- # sort the vlan info first by vlan id and then by device id, since the target
- # is to always use the lowest available of both, in that order.
- # mergesort used because it is the only stable one, according to the doc,
- # which is important when sorting by multiple fields
- # 'view' is used to 1. not copy the data, but instead simply have a different representation
- # 2. add 'fN' fields to easily sort by in a specified order
- by_vlan_id = np.sort(vlans.view('int, int, int'), kind='mergesort', order=['f2', 'f0'], axis=0)
- # The idea is to only traverse the vlan array once, thus we keep track of the index
- # to start looking for a free vlan/device for a request at.
- no_red_start = 0
- with_red_start = 0
- # output line index
- idx = 0
- t1 = time.time()
- for req in requests:
- if req[1]:
- # find the next closest available primary port
- dev_id, vlan_id, with_red_start = next_with_redundancy(by_vlan_id, with_red_start)
- out[0][idx:idx+2] = req[0]
- out[1][idx:idx+2] = dev_id
- out[3][idx:idx+2] = vlan_id
- out[2][idx] = 0
- out[2][idx+1] = 1
- idx += 2
- else:
- # find the next closest available primary/secondary port pair.
- dev_id, vlan_id, no_red_start = next_no_redundancy(by_vlan_id, no_red_start)
- out[0][idx] = req[0]
- out[1][idx] = dev_id
- out[2][idx] = 1
- out[3][idx] = vlan_id
- idx += 1
- t2 = time.time()
- # convert output list to numpy array (four rows, may columns) and transpose (four columns, may rows)
- # to write in the correct format
- np.savetxt('output_np2.csv', np.array(out).transpose(), fmt="%d", delimiter=",", newline="\n", header='request_id,device_id,primary_port,vlan_id', comments="")
- t3 = time.time()
- print("%.2fsec: reading: %dms, processing: %dms, writing: %dms" % (
- t3 - t0, (t1 - t0) * 1000, (t2 - t1) * 1000, (t3 - t2) * 1000))
- def next_no_redundancy(vlan_info, start):
- # Requests with no redundancy are straightforward:
- # Starting from where we left off previously, find the first
- # available vlan_id attached to a primary port.
- # Due to earlier sort, it will be the lowest vlan_id
- # and device_id, no extra work needed.
- for idx, info in enumerate(vlan_info[start:]):
- # Because of the 'view' of an original array made earlier for sorting by field,
- # each row has been wrapped into an ndarray, so we unwrap it here to get to the
- # data values
- info = info[0]
- # vlan_id cannot be zero. If it is, the data row has been cleared, implying
- # the "reserved" status, skip.
- if not info[2]:
- continue
- if info[1]:
- # store the ids
- dev_id = info[0]
- vlan_id = info[2]
- # mark the device port as used. simply null the whole data row, hence the bytes literal
- # this is needed because no_red_start and with_red_start are independent and would
- # otherwise reserve the same port.
- vlan_info[start + idx] = b'\x00'
- # return the ids and the index to start searching from for the next request
- return dev_id, vlan_id, start + idx + 1
- raise ValueError("No free devices for a no-redundancy request! Need more moneyz!")
- def next_with_redundancy(vlan_info, start):
- # init to None. Could be init'ed to vlan_info[start] data,
- # but then the 'is reserved' checks would need to be duplicated here.
- # That's ugly, so no.
- dev_id, vlan_id, primary, secondary = [None, None, None, None
- for idx, info in enumerate(vlan_info[start:]):
- info = info[0]
- if not info[2]:
- continue
- # make sure that both primary and secondary have the same vlan_id on the same device.
- # 'dev_id is None' is actually redundant (info[0] is never None, so the second condition
- # would also be True), but for this particular data set this yields a performance
- # increase: most of the time a device is found within a few elements, but
- # 'is None' is much quicker than access to info[0] for the comparison, so removing
- # one info[0] access from the 3-5 checks that are made saves a lot more than
- # extra 'is None' check the other 2-4 times (dev_id can be (and always is) None
- # only during the first iteration.
- if dev_id is None or info[0] != dev_id or info[2] != vlan_id:
- dev_id = info[0]
- vlan_id = info[2]
- # if dev_id and/or vlan_id have changed, reset the previously prepared ports,
- # because we need a pair with the same dev/vlan id
- primary, secondary = None, None
- if info[1]:
- # this serve double purpose:
- # 1. stores the row index to be nullified later to be marked as "reserved"
- # 2. flags that the current vlan/dev id pair's primary port is up for
- # reservation
- primary = start + idx
- else:
- # same as above, for the secondary port
- secondary = start + idx
- # if we have found a vlan/dev pair with both port available, reserve them and return
- if primary is not None and secondary is not None:
- # nullify the "reserved" vlan ids on a device
- vlan_info[primary] = b'\x00'
- vlan_info[secondary] = b'\x00'
- # same as in no-redundancy
- return dev_id, vlan_id, start + idx + 1
- raise ValueError("No free devices for a request with redundancy! Need funding!")
- def load_files():
- vlans = np.loadtxt(open('vlans.csv', 'rb'), delimiter=",", skiprows=1, dtype=int)
- requests = np.loadtxt(open('requests.csv', 'rb'), delimiter=",", skiprows=1, dtype=int)
- return vlans, requests
- if __name__ == "__main__":
- main()
Add Comment
Please, Sign In to add comment