# Ticket #119: page_rank.py

File page_rank.py, 6.7 KB (added by aric, 8 years ago) |
---|

Line | |
---|---|

1 | #!/usr/bin/env python |

2 | # Copyright (C) 2004-2007 by |

3 | # Aric Hagberg <hagberg@lanl.gov> |

4 | # Dan Schult <dschult@colgate.edu> |

5 | # Pieter Swart <swart@lanl.gov> |

6 | # Distributed under the terms of the GNU Lesser General Public License |

7 | # http://www.gnu.org/copyleft/lesser.html |

8 | # NetworkX:http://networkx.lanl.gov/. |

9 | __author__ = """Aric Hagberg (hagberg@lanl.gov)""" |

10 | |

11 | import networkx as NX |

12 | from networkx.exception import NetworkXError |

13 | |

14 | def page_rank(G,alpha=0.85,max_iter=100,tol=1.0e-4,nstart=None): |

15 | """Return a dictionary keyed by node of the PageRank of G. |

16 | |

17 | PageRank computes the largest eigenvector of the stochastic |

18 | adjacency matrix of G. |

19 | |

20 | The eigenvector calculation is done by the power iteration method |

21 | and has no guarantee of convergence. The iteration will stop |

22 | after max_iter iterations or an error tolerance of |

23 | number_of_nodes(G)*tol has been reached. |

24 | |

25 | A starting vector for the power iteration can be given in the |

26 | dictionary nstart. |

27 | |

28 | This is a pure Python implementation. |

29 | |

30 | """ |

31 | if hasattr(G,"multiedges"): |

32 | if G.multiedges==True: |

33 | raise TypeError, \ |

34 | "page_rank not valid for graphs with multiedges." |

35 | |

36 | # create a copy in (right) stochastic form |

37 | W=stochastic(G) |

38 | |

39 | # choose fixed starting vector if not given |

40 | if nstart is None: |

41 | x=dict.fromkeys(W,1.0/W.number_of_nodes()) |

42 | else: |

43 | x=nstart |

44 | # normalize starting vector to 1 |

45 | s=1.0/sum(x.values()) |

46 | for k in x: x[k]*=s |

47 | |

48 | nnodes=W.number_of_nodes() |

49 | # "dangling" nodes, no links out from them |

50 | dangle=[n for n in W if sum(W.adj[n])==0.0] # XGraph internals exposed |

51 | # pagerank power iteration: make up to max_iter iterations |

52 | for i in range(max_iter): |

53 | xlast=x |

54 | x=dict.fromkeys(xlast.keys(),0) |

55 | danglesum=alpha/nnodes*sum(xlast[n] for n in dangle) |

56 | teleportsum=(1.0-alpha)/nnodes*sum(xlast.values()) |

57 | for n in x: |

58 | # this matrix multiply looks odd because it is |

59 | # doing a left multiply x^T=xlast^T*W |

60 | for nbr in W[n]: |

61 | x[nbr]+=alpha*xlast[n]*W.adj[n][nbr] # XGraph internals exposed |

62 | x[n]+=danglesum+teleportsum |

63 | # normalize vector to 1 |

64 | s=1.0/sum(x.values()) |

65 | for n in x: x[n]*=s |

66 | # check convergence, l1 norm |

67 | err=sum([abs(x[n]-xlast[n]) for n in x]) |

68 | if err < n*tol: |

69 | return x |

70 | |

71 | raise NetworkXError("page_rank: power iteration failed to converge in %d iterations."%(i+1)) |

72 | |

73 | |

74 | def stochastic(G,inplace=False): |

75 | """Return a version of the weighted graph G converted to a (right) |

76 | stochastic representation. That is, make all of the weights for the |

77 | neighbors of a given node sum to 1. |

78 | |

79 | If inplace=True the conversion is done in place - no copy of the graph is |

80 | made. This will destroy the original graph G. |

81 | |

82 | """ |

83 | # this is a hack, better handling to come in networkx-0.36 |

84 | if inplace: |

85 | W=G # copy, better be an XGraph |

86 | else: |

87 | if G.is_directed(): |

88 | W=networkx.XDiGraph(G) # make a new XDiGraph |

89 | else: |

90 | W=networkx.XGraph(G) # make a new XGraph |

91 | for (u,v,d) in W.edges(): |

92 | if d is None: |

93 | W.add_edge(u,v,1.0) |

94 | |

95 | # exposing graph/digraph internals here |

96 | for n in W: |

97 | deg=float(sum(W.adj[n].values())) |

98 | for p in W.adj[n]: |

99 | W.adj[n][p]/=deg |

100 | return W |

101 | |

102 | def google_matrix(G,alpha=0.85,nodelist=None): |

103 | M=to_numpy_matrix(G,nodelist=nodelist) |

104 | (n,m)=M.shape # should be square |

105 | # add constant to dangling nodes' row |

106 | dangling=numpy.where(M.sum(axis=1)==0) |

107 | for d in dangling[0]: |

108 | M[d]=1.0/n |

109 | # normalize |

110 | M=M/M.sum(axis=1) |

111 | # add "teleportation" |

112 | P=alpha*M+(1-alpha)*numpy.ones((n,n))/n |

113 | return P |

114 | |

115 | def page_rank_exact_numpy(G,alpha=0.85,nodelist=None): |

116 | """PageRank using numpy and call to LAPACK eig().""" |

117 | import numpy |

118 | M=google_matrix(G,alpha,nodelist) |

119 | e,ev=numpy.linalg.eig(M.T) |

120 | m=e.argsort()[-1] # index of maximum eigenvalue |

121 | x=numpy.array(ev[:,m])[:,m] |

122 | return x |

123 | |

124 | def page_rank_numpy(G,alpha=0.85,max_iter=100,tol=1.0e-4,nodelist=None): |

125 | """Return a numpy array of the PageRank of G. |

126 | |

127 | PageRank computes the largest eigenvector of the stochastic |

128 | adjacency matrix of G. |

129 | |

130 | The eigenvector calculation is done by the power iteration method |

131 | and has no guarantee of convergence. |

132 | |

133 | A starting vector for the power iteration can be given in the |

134 | dictionary nstart. |

135 | |

136 | This implementation requires numpy. |

137 | |

138 | """ |

139 | import numpy |

140 | M=google_matrix(G,alpha,nodelist) |

141 | (n,m)=M.shape # should be square |

142 | x=numpy.ones((n))/n |

143 | for i in range(max_iter): |

144 | xlast=x |

145 | x=numpy.dot(x,M) |

146 | # check convergence, l1 norm |

147 | err=numpy.abs(x-xlast).sum() |

148 | if err < n*tol: |

149 | return numpy.asarray(x).flatten() |

150 | |

151 | raise NetworkXError("page_rank: power iteration failed to converge in %d iterations."%(i+1)) |

152 | |

153 | |

154 | |

155 | def page_rank_scipy(G,alpha=0.85,max_iter=100,tol=1.0e-4,nodelist=None): |

156 | """Return a numpy array of the PageRank of G. |

157 | |

158 | PageRank computes the largest eigenvector of the stochastic |

159 | adjacency matrix of G. |

160 | |

161 | The eigenvector calculation is done by the power iteration method |

162 | and has no guarantee of convergence. |

163 | |

164 | A starting vector for the power iteration can be given in the |

165 | dictionary nstart. |

166 | |

167 | This implementation requires scipy. |

168 | |

169 | """ |

170 | import scipy.sparse |

171 | M=to_scipy_sparse_matrix(G,nodelist=nodelist) |

172 | (n,m)=M.shape # should be square |

173 | S=scipy.array(M.sum(axis=1)).flatten() |

174 | index=scipy.where(S<>0)[0] |

175 | for i in index: |

176 | M[i,:]*=1.0/S[i] |

177 | x=scipy.ones((n))/n # initial guess |

178 | dangle=scipy.array(scipy.where(M.sum(axis=1)==0,1.0/n,0)).flatten() |

179 | for i in range(max_iter): |

180 | xlast=x |

181 | x=alpha*(M.rmatvec(x)+scipy.dot(dangle,xlast))+(1-alpha)*xlast.sum()/n |

182 | # check convergence, l1 norm |

183 | err=scipy.absolute(x-xlast).sum() |

184 | if err < n*tol: |

185 | return x |

186 | |

187 | raise NetworkXError("page_rank: power iteration failed to converge in %d iterations."%(i+1)) |

188 | |

189 | |

190 | if __name__ == "__main__": |

191 | |

192 | from networkx import * |

193 | import numpy |

194 | |

195 | G=DiGraph() |

196 | |

197 | edges=[(1,2),(1,3),\ |

198 | (3,1),(3,2),(3,5),\ |

199 | (4,5),(4,6),\ |

200 | (5,4),(5,6),\ |

201 | (6,4)] |

202 | |

203 | |

204 | G.add_edges_from(edges) |

205 | |

206 | M=google_matrix(G,alpha=0.9) |

207 | e,ev=numpy.linalg.eig(M.T) |

208 | p=numpy.array(ev[:,0]/ev[:,0].sum())[:,0] |

209 | print "exact ", p |

210 | |

211 | pr=page_rank(G,alpha=0.9,tol=1.0e-8) |

212 | print "networkx", pr.values() |

213 | |

214 | np=page_rank_numpy(G,alpha=0.9) |

215 | print "numpy ", np |

216 | |

217 | ns=page_rank_scipy(G,alpha=0.9) |

218 | print "scipy ", ns |