emwnenmf_comp.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. import numpy as np
  2. import time
  3. def emwnenmf_comp(data, G, F, r, Tmax):
  4. OutIter = 50
  5. tol = 1e-5
  6. delta_measure = 1
  7. em_iter_max = round(Tmax / delta_measure) + 1 #
  8. T = np.empty(shape=(em_iter_max + 1))
  9. T.fill(np.nan)
  10. RMSE = np.empty(shape=(2, em_iter_max + 1))
  11. RMSE.fill(np.nan)
  12. ITER_MAX = 200 # maximum inner iteration number (Default)
  13. ITER_MIN = 10 # minimum inner iteration number (Default)
  14. np.put(F, data.idxOF, data.sparsePhi_F)
  15. np.put(G, data.idxOG, data.sparsePhi_G)
  16. Xcomp = data.X + np.multiply(data.nW, np.dot(G, F))
  17. FXt = np.dot(F, Xcomp.T)
  18. FFt = np.dot(F, F.T)
  19. GtX = np.dot(G.T, Xcomp)
  20. GtG = np.dot(G.T, G)
  21. GradG = np.dot(G, FFt) - FXt.T
  22. GradF = np.dot(GtG, F) - GtX
  23. init_delta = stop_rule(np.hstack((G.T, F)), np.hstack((GradG.T, GradF)))
  24. tolF = tol * init_delta
  25. tolG = tolF # Stopping tolerance
  26. # Iterative updating
  27. G = G.T
  28. k = 0
  29. RMSE[:, k] = np.linalg.norm(F[:, 0:-1] - data.F[:, 0:-1], 2, axis=1) / np.sqrt(F.shape[1] - 1)
  30. T[k] = 0
  31. t = time.time()
  32. to_comp = False
  33. trigger_comp = 1e-5
  34. # Main loop
  35. while time.time() - t <= Tmax + delta_measure:
  36. # Estimation step
  37. Xcomp = data.X + np.multiply(data.nW, np.dot(G.T, F))
  38. if to_comp:
  39. # Compress left and right
  40. L, R = RSI_compression(Xcomp, r)
  41. X_L = np.dot(L, Xcomp)
  42. X_R = np.dot(Xcomp, R)
  43. # Maximisation step
  44. for _ in range(OutIter):
  45. # Optimize F with fixed G
  46. np.put(F, data.idxOF, 0)
  47. F, iterF, _ = NNLS(F, GtG, GtX - GtG.dot(data.Phi_F), ITER_MIN, ITER_MAX, tolF, data.idxOF, False)
  48. np.put(F, data.idxOF, data.sparsePhi_F)
  49. if iterF <= ITER_MIN:
  50. tolF = tolF / 10
  51. # print('Tweaked F tolerance to '+str(tolF))
  52. if to_comp:
  53. F_comp = F.dot(R)
  54. FFt = np.dot(F_comp, F_comp.T)
  55. FXt = np.dot(F_comp, X_R.T)
  56. else:
  57. FFt = np.dot(F, F.T)
  58. FXt = np.dot(F, Xcomp.T)
  59. # Optimize G with fixed F
  60. np.put(G.T, data.idxOG, 0)
  61. G, iterG, _ = NNLS(G, FFt, FXt - FFt.dot(data.Phi_G.T), ITER_MIN, ITER_MAX, tolG, data.idxOG, True)
  62. np.put(G.T, data.idxOG, data.sparsePhi_G)
  63. if iterG <= ITER_MIN:
  64. tolG = tolG / 10
  65. # print('Tweaked G tolerance to '+str(tolG))
  66. if to_comp:
  67. G_comp = G.dot(L.T)
  68. GtG = np.dot(G_comp, G_comp.T)
  69. GtX = np.dot(G_comp, X_L)
  70. else:
  71. GtG = np.dot(G, G.T)
  72. GtX = np.dot(G, Xcomp)
  73. if time.time() - t - k * delta_measure >= delta_measure:
  74. k = k + 1
  75. if k >= em_iter_max + 1:
  76. break
  77. RMSE[:, k] = np.linalg.norm(F[:, 0:-1] - data.F[:, 0:-1], 2, axis=1) / np.sqrt(F.shape[1] - 1)
  78. T[k] = time.time() - t
  79. # if k%100==0:
  80. # print(str(k)+' '+str(RMSE[0,k])+' '+str(RMSE[1,k]))
  81. if not to_comp:
  82. if nmf_norm_fro(Xcomp, G.T, F) > trigger_comp:
  83. break
  84. else:
  85. to_comp = True
  86. print(time.time()-t)
  87. break
  88. return {'RMSE': RMSE, 'T': T}
  89. def stop_rule(X, GradX):
  90. # Stopping Criterions
  91. pGrad = GradX[np.any(np.dstack((X > 0, GradX < 0)), 2)]
  92. return np.linalg.norm(pGrad, 2)
  93. def NNLS(Z, GtG, GtX, iterMin, iterMax, tol, idxfixed, transposed):
  94. L = np.linalg.norm(GtG, 2) # Lipschitz constant
  95. H = Z # Initialization
  96. Grad = np.dot(GtG, Z) - GtX # Gradient
  97. alpha1 = 1
  98. for iter in range(1, iterMax + 1):
  99. H0 = H
  100. H = np.maximum(Z - Grad / L, 0) # Calculate squence 'Y'
  101. if transposed: # If Z = G.T
  102. np.put(H.T, idxfixed, 0)
  103. else: # If Z = F
  104. np.put(H, idxfixed, 0)
  105. alpha2 = 0.5 * (1 + np.sqrt(1 + 4 * alpha1 ** 2))
  106. Z = H + ((alpha1 - 1) / alpha2) * (H - H0)
  107. alpha1 = alpha2
  108. Grad = np.dot(GtG, Z) - GtX
  109. # Stopping criteria
  110. if iter >= iterMin:
  111. # Lin's stopping criteria
  112. pgn = stop_rule(Z, Grad)
  113. if pgn <= tol:
  114. break
  115. return H, iter, Grad
  116. def nmf_norm_fro(X, G, F, *args):
  117. W = args
  118. if len(W) == 0:
  119. f = np.square(np.linalg.norm(X - np.dot(G, F), 'fro')) / np.square(np.linalg.norm(X, 'fro'))
  120. else:
  121. W = W[0]
  122. f = np.square(np.linalg.norm(X - np.multiply(W, np.dot(G, F)), 'fro')) / np.square(np.linalg.norm(X, 'fro'))
  123. return f
  124. def RSI_compression(X, r):
  125. compressionLevel = 20
  126. m, n = X.shape
  127. l = min(n, max(compressionLevel, r + 10))
  128. OmegaL = np.random.randn(n, l)
  129. Y = np.dot(X, OmegaL)
  130. for _ in range(4):
  131. Y = np.linalg.qr(Y, mode='reduced')[0]
  132. S = np.dot(X.T, Y)
  133. Z = np.linalg.qr(S, mode='reduced')[0]
  134. Y = np.dot(X, Z)
  135. L = np.linalg.qr(Y, mode='reduced')[0].T
  136. OmegaR = np.random.randn(l, m)
  137. Y = np.dot(OmegaR, X)
  138. for _ in range(4):
  139. Y = np.linalg.qr(Y.T, mode='reduced')[0]
  140. S = np.dot(X, Y)
  141. Z = np.linalg.qr(S, mode='reduced')[0]
  142. Y = np.dot(Z.T, X)
  143. R = np.linalg.qr(Y.T, mode='reduced')[0]
  144. return L, R