Attachments you submit will be routed for moderation. If you have an account, please log in first.

Ticket #119: page_rank.py

File page_rank.py, 6.7 KB (added by aric, 7 years ago)
Line 
1#!/usr/bin/env python
2#    Copyright (C) 2004-2007 by
3#    Aric Hagberg <hagberg@lanl.gov>
4#    Dan Schult <dschult@colgate.edu>
5#    Pieter Swart <swart@lanl.gov>
6#    Distributed under the terms of the GNU Lesser General Public License
7#    http://www.gnu.org/copyleft/lesser.html
8#    NetworkX:http://networkx.lanl.gov/.
9__author__ = """Aric Hagberg (hagberg@lanl.gov)"""
10
11import networkx as NX
12from networkx.exception import NetworkXError
13
14def page_rank(G,alpha=0.85,max_iter=100,tol=1.0e-4,nstart=None):
15    """Return a dictionary keyed by node of the PageRank of G.
16   
17    PageRank computes the largest eigenvector of the stochastic
18    adjacency matrix of G.
19
20    The eigenvector calculation is done by the power iteration method
21    and has no guarantee of convergence.   The iteration will stop
22    after max_iter iterations or an error tolerance of
23    number_of_nodes(G)*tol has been reached.
24
25    A starting vector for the power iteration can be given in the
26    dictionary nstart.
27
28    This is a pure Python implementation.
29
30    """
31    if hasattr(G,"multiedges"):
32        if G.multiedges==True:
33            raise TypeError, \
34                "page_rank not valid for graphs with multiedges."
35
36    # create a copy in (right) stochastic form       
37    W=stochastic(G)       
38
39    # choose fixed starting vector if not given
40    if nstart is None:
41        x=dict.fromkeys(W,1.0/W.number_of_nodes())
42    else:
43        x=nstart
44        # normalize starting vector to 1               
45        s=1.0/sum(x.values())
46        for k in x: x[k]*=s
47
48    nnodes=W.number_of_nodes()
49    # "dangling" nodes, no links out from them
50    dangle=[n for n in W if sum(W.adj[n])==0.0]  # XGraph internals exposed   
51    # pagerank power iteration: make up to max_iter iterations       
52    for i in range(max_iter):
53        xlast=x
54        x=dict.fromkeys(xlast.keys(),0)
55        danglesum=alpha/nnodes*sum(xlast[n] for n in dangle)
56        teleportsum=(1.0-alpha)/nnodes*sum(xlast.values())
57        for n in x:
58            # this matrix multiply looks odd because it is
59            # doing a left multiply x^T=xlast^T*W
60            for nbr in W[n]:
61                x[nbr]+=alpha*xlast[n]*W.adj[n][nbr] # XGraph internals exposed
62            x[n]+=danglesum+teleportsum
63        # normalize vector to 1               
64        s=1.0/sum(x.values())
65        for n in x: x[n]*=s
66        # check convergence, l1 norm           
67        err=sum([abs(x[n]-xlast[n]) for n in x])
68        if err < n*tol:
69            return x
70
71    raise NetworkXError("page_rank: power iteration failed to converge in %d iterations."%(i+1))
72
73
74def stochastic(G,inplace=False):
75    """Return a version of the weighted graph G converted to a (right)
76    stochastic representation.  That is, make all of the weights for the
77    neighbors of a given node sum to 1.
78   
79    If inplace=True the conversion is done in place - no copy of the graph is
80    made.  This will destroy the original graph G.
81
82    """       
83    # this is a hack, better handling to come in networkx-0.36
84    if inplace:
85        W=G # copy, better be an XGraph
86    else:
87        if G.is_directed():
88            W=networkx.XDiGraph(G) # make a new XDiGraph
89        else:
90            W=networkx.XGraph(G) # make a new XGraph
91    for (u,v,d) in W.edges():
92        if d is None:
93            W.add_edge(u,v,1.0)
94
95    # exposing graph/digraph internals here
96    for n in W:
97        deg=float(sum(W.adj[n].values()))
98        for p in W.adj[n]: 
99            W.adj[n][p]/=deg
100    return W
101
102def google_matrix(G,alpha=0.85,nodelist=None):
103    M=to_numpy_matrix(G,nodelist=nodelist)
104    (n,m)=M.shape # should be square
105    # add constant to dangling nodes' row
106    dangling=numpy.where(M.sum(axis=1)==0)
107    for d in dangling[0]:
108        M[d]=1.0/n
109    # normalize       
110    M=M/M.sum(axis=1)
111    # add "teleportation"
112    P=alpha*M+(1-alpha)*numpy.ones((n,n))/n
113    return P
114   
115def page_rank_exact_numpy(G,alpha=0.85,nodelist=None):
116    """PageRank using numpy and call to LAPACK eig()."""
117    import numpy
118    M=google_matrix(G,alpha,nodelist)
119    e,ev=numpy.linalg.eig(M.T)
120    m=e.argsort()[-1] # index of maximum eigenvalue
121    x=numpy.array(ev[:,m])[:,m]
122    return x
123
124def page_rank_numpy(G,alpha=0.85,max_iter=100,tol=1.0e-4,nodelist=None):
125    """Return a numpy array of the PageRank of G.
126   
127    PageRank computes the largest eigenvector of the stochastic
128    adjacency matrix of G.
129
130    The eigenvector calculation is done by the power iteration method
131    and has no guarantee of convergence.   
132
133    A starting vector for the power iteration can be given in the
134    dictionary nstart.
135
136    This implementation requires numpy.
137
138    """
139    import numpy
140    M=google_matrix(G,alpha,nodelist)   
141    (n,m)=M.shape # should be square
142    x=numpy.ones((n))/n
143    for i in range(max_iter):
144        xlast=x
145        x=numpy.dot(x,M)
146        # check convergence, l1 norm           
147        err=numpy.abs(x-xlast).sum()
148        if err < n*tol:
149            return numpy.asarray(x).flatten()
150
151    raise NetworkXError("page_rank: power iteration failed to converge in %d iterations."%(i+1))
152
153
154
155def page_rank_scipy(G,alpha=0.85,max_iter=100,tol=1.0e-4,nodelist=None):
156    """Return a numpy array of the PageRank of G.
157   
158    PageRank computes the largest eigenvector of the stochastic
159    adjacency matrix of G.
160
161    The eigenvector calculation is done by the power iteration method
162    and has no guarantee of convergence.   
163
164    A starting vector for the power iteration can be given in the
165    dictionary nstart.
166
167    This implementation requires scipy.
168
169    """
170    import scipy.sparse
171    M=to_scipy_sparse_matrix(G,nodelist=nodelist)
172    (n,m)=M.shape # should be square
173    S=scipy.array(M.sum(axis=1)).flatten()
174    index=scipy.where(S<>0)[0]
175    for i in index:
176        M[i,:]*=1.0/S[i]
177    x=scipy.ones((n))/# initial guess
178    dangle=scipy.array(scipy.where(M.sum(axis=1)==0,1.0/n,0)).flatten()
179    for i in range(max_iter):
180        xlast=x
181        x=alpha*(M.rmatvec(x)+scipy.dot(dangle,xlast))+(1-alpha)*xlast.sum()/n
182        # check convergence, l1 norm           
183        err=scipy.absolute(x-xlast).sum()
184        if err < n*tol:
185            return x
186
187    raise NetworkXError("page_rank: power iteration failed to converge in %d iterations."%(i+1))
188
189
190if __name__ == "__main__":
191
192    from networkx import *
193    import numpy
194
195    G=DiGraph()
196
197    edges=[(1,2),(1,3),\
198           (3,1),(3,2),(3,5),\
199           (4,5),(4,6),\
200           (5,4),(5,6),\
201           (6,4)]
202           
203
204    G.add_edges_from(edges)
205
206    M=google_matrix(G,alpha=0.9)
207    e,ev=numpy.linalg.eig(M.T)
208    p=numpy.array(ev[:,0]/ev[:,0].sum())[:,0]
209    print "exact  ", p
210
211    pr=page_rank(G,alpha=0.9,tol=1.0e-8)
212    print "networkx", pr.values()
213
214    np=page_rank_numpy(G,alpha=0.9)
215    print "numpy  ", np
216
217    ns=page_rank_scipy(G,alpha=0.9)
218    print "scipy  ", ns