1、表准备
存储所有网页的基本信息
CREATE TABLE [WebPages] (
[DOCID] [int] IDENTITY (1, 1) NOT NULL ,
[URL] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL ,
[PAGETEXT] [text] COLLATE Chinese_PRC_CI_AS NULL ,
[REFCOUNT] [int] NULL ,
[WEIGHT] [float] NULL ,
CONSTRAINT [DOCID] PRIMARY KEY CLUSTERED
(
[DOCID]
) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
存储网页链接关系
CREATE TABLE [PageLinks] (
[docid] [int] NULL ,
[url] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL ,
[anchordocid] [int] NULL ,
[anchorurl] [varchar] (900) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO
2、执行脚本,填充WebPages中的REFCOUNT,使之成为链出网页的数量
declare cur_webpages cursor
for select docid from webpages
declare @docid int
open cur_webpages
fetch next from cur_webpages into @docid
while (@@fetch_status=0)
begin
declare @rcount int
select @rcount=count(*) from pagelinks where docid=@docid
update webpages set refcount=@rcount WHERE CURRENT OF cur_webpages
fetch next from cur_webpages into @docid
end
close cur_webpages
deallocate cur_webpages
3、填充初始值
update webpages set weight=1.0/(select count(*) from webpages)
update webpages set REVWEIGHT=0
4、计算PageRank的SQL脚本
declare @count int
set @count=0
while(@count<=7)
begin
declare cur_webpages cursor
for select docid,refcount,weight,revweight from webpages
declare @docid int
declare @refcount int
declare @weight float
declare @revweight float
if(@count%2=0)
select docid,weight into pagebak from webpages
else
select docid,revweight as weight into pagebak from webpages
CREATE INDEX [docid] ON [dbo].[pagebak]([docid])
open cur_webpages
fetch next from cur_webpages into @docid,@refcount,@weight,@revweight
while (@@fetch_status=0)
begin
if @refcount<>0
begin
if(@count%2=0)
update pagebak set weight=weight+(@weight/@refcount) where docid in (select anchordocid from pagelinks where docid=@docid)
else
update pagebak set weight=weight+(@revweight/@refcount) where docid in (select anchordocid from pagelinks where docid=@docid)
end
fetch next from cur_webpages into @docid,@refcount,@weight,@revweight
end
close cur_webpages
deallocate cur_webpages
if(@count%2=0)
begin
update webpages set revweight=0.2+(select weight from pagebak where pagebak.docid=webpages.docid)
update webpages set revweight=revweight/(select sum(revweight) from webpages)
end
else
begin
update webpages set weight=0.2+(select weight from pagebak where pagebak.docid=webpages.docid)
update webpages set weight=weight/(select sum(weight) from webpages)
end
drop table pagebak
select * from webpages
set @count=@count+1
end