Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def train_classifier(training_records):
- """ Return a dict containing the midpoint between averages
- among each class (malignant and benign) of each attribute.
- (See the A5 writeup for a more complete description)
- Precondition: training_records is a list of patient record
- dictionaries, each of which has the keys
- in the global variable ATTRS
- Postcondition: the returned dict has midpoint values calculated
- from the training set for all 10 attributes except
- "ID" and"class".
- """
- malignant_tumors = {}
- benign_tumors = {}
- tumor_avgs = {}
- malignant_count = 0
- benign_count = 0
- # Fill dictionaries with the correct attributes
- for attribute in ATTRS[1:-1]:
- malignant_tumors.update({attribute: 0.0})
- benign_tumors.update({attribute: 0.0})
- tumor_avgs.update({attribute: 0.0})
- # Fill malignant_tumors and benign_tumors dictionaries with the sums of all
- # corresponding keys from training_records with the class 'M' or 'B'
- for i in range(len(training_records)):
- if training_records[i]['class'] == 'M':
- for attribute in ATTRS[1:-1]:
- malignant_tumors[attribute] += training_records[i][attribute]
- malignant_count += 1
- else:
- for attribute in ATTRS[1:-1]:
- benign_tumors[attribute] += training_records[i][attribute]
- benign_count += 1
- # Find the averages for all keys in malignant_tumors and benign_tumors,
- # then fill tumor_avgs with the average of all keys in malignant_tumors and
- # benign tumors.
- for attribute in ATTRS[1:-1]:
- malignant_tumors[attribute] /= malignant_count
- benign_tumors[attribute] /= benign_count
- tumor_avgs[attribute] = (
- (malignant_tumors[attribute] + benign_tumors[attribute]) * 0.5
- )
- return tumor_avgs
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement