pla_np.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/usr/bin/env python3
  2. import numpy as np
  3. c = 0.1
  4. LIMIT = 100
  5. print_epoches = [5, 10, 100, 1000]
  6. DATA_SLICE = 5
  7. NUM_CLASS = 4
  8. INPUT_FEATURE = 6
  9. def load():
  10. raw_data = np.loadtxt('car.data', delimiter=',')
  11. data_size = len(raw_data)
  12. np.random.shuffle(raw_data)
  13. data = np.array_split(raw_data, DATA_SLICE)
  14. ret = [np.split(i, [1,], axis=1) for i in data]
  15. return ret
  16. data_pieces = load()
  17. val_idx = 0
  18. train_label = np.concatenate(
  19. [data_pieces[i][0] for i in list(range(DATA_SLICE))[:val_idx] + list(range(DATA_SLICE))[val_idx+1:]]
  20. )
  21. train_label = train_label - 1
  22. train_label = train_label.flatten().astype(int)
  23. train_data = np.concatenate(
  24. [data_pieces[i][1] for i in list(range(DATA_SLICE))[:val_idx] + list(range(DATA_SLICE))[val_idx+1:]]
  25. )
  26. val_label = data_pieces[val_idx][0]
  27. val_data = data_pieces[val_idx][1]
  28. val_label = val_label - 1
  29. val_label = val_label.flatten().astype(int)
  30. v = np.array(val_data, dtype='float32')
  31. v = np.concatenate((v, np.ones((len(v), 1))), axis=1)
  32. def pla(train_data, train_label, val_data, val_label, picked_class):
  33. x = np.array(train_data, dtype='float32')
  34. x = np.concatenate((x, np.ones((len(x), 1))), axis=1)
  35. for i in range(len(x)):
  36. if train_label[i] != picked_class:
  37. x[i] *= -1
  38. w = np.random.rand(INPUT_FEATURE + 1)
  39. for j in range(LIMIT):
  40. flag = False
  41. for i in x:
  42. z = sum(i * w)
  43. if z <= 0:
  44. w = w + c * i
  45. flag = True
  46. if not flag:
  47. print('')
  48. break
  49. #if j in print_epoches:
  50. print(j)
  51. print('train acc:', np.sum(np.sum(x * w, axis=1) > 0) / len(x))
  52. print('val acc:', np.sum((np.sum(v * w, axis=1) > 0) == (val_label == picked_class)) / len(v))
  53. return w
  54. w = [pla(train_data, train_label, val_data, val_label, i) for i in range(NUM_CLASS)]
  55. pred = [np.sum(v * w[i], axis=1) > 0 for i in range(NUM_CLASS)]
  56. print('val acc:', sum([all([pred[j][i] == (j == val_label[i]) for j in range(NUM_CLASS)]) for i in range(len(val_label))]) / len(val_label))