学习啦,谢谢!
[此贴子已经被作者于2025-12-9 11:46编辑过]
程序代码:
** 从第一章开始
cUrl = "http://www./6780946/3664234966.html"
http = CREATEOBJECT("WinHttp.WinHttpRequest.5.1")
html = CREATEOBJECT("htmlfile")
html.designmode = "on"
html.write(".")
outFile = "C:\_temp\仙道九绝.dbf"
CREATE TABLE (outFile) (标题 V(50), 网址 V(50), 内容 M)
getHtmlText(cUrl)
SELECT * FROM 仙道九绝
CLOSE TABLES ALL
CLEAR ALL
RETURN
FUNCTION getHtmlText(cUrl)
PRIVATE cHtml, host, purl, chapters, title, pages, n, m
cHtml = ""
host = "http://" + STREXTRACT(cUrl, "http://", "/")
purl = host+"/6780946/"
pages = getPages(purl)
IF pages == 0
RETURN
ENDIF
chapters = ""
chapterUrl = cUrl
title = ""
wait = CREATEOBJECT("wait_form")
wait.show
n = 1 && 章节计数
m = wait.statbox.width / pages && 进度条单位
DO WHILE !(cUrl==purl) AND getHtmlTextByUrl(cUrl,@cHtml)
cUrl = getChapter(cHtml, cUrl)
#if 0 && 调试用
IF n>10
exit
ENDIF
#endif
ENDDO
wait.release
ENDFUNC
FUNCTION getChapter(cHtml, cUrl)
html.getElementsByTagName("body").item[0].innerHTML = cHtml
LOCAL chapter, content
title = STREXTRACT(html.getElementsByTagName("title").item[0].innerText, "_", "_")
chapter = html.getElementById("chapter").innerText
chapter = STRTRAN(chapter, STREXTRACT(chapter, "[","]",1,4), "")
content = STRCONV(ALLTRIM(html.getElementById("ad").innerText, "_ii_rr('", "');"), 14)
ALINES(arrays, content, 5, ",")
lines = ALINES(aDatas, chapter, 0h0D0A0D0A)
code1 = VAL(arrays[1])
chapter = ""
FOR i=2 TO lines+1 && 取一页
chapter = chapter + aDatas[VAL(arrays[i]) - code1 + 1] + 0h0D0A0D0A
ENDFOR
chapters = chapters + RTRIM(chapter,0h0D0A) && 每章各页
page = html.getElementsByClassName("m-tpage").item[0].getElementsByTagName("a").item[2].getAttribute("href")
IF !(LEFT(page,AT("_",page)-1) $ cUrl)
INSERT INTO 仙道九绝 VALUES (title, chapterUrl, chapters)
chapters = ""
chapterUrl = host + page && 下一章
wait.msg.caption = title+0h0D0D+chapterUrl
wait.statbar.width = m * n
n = n+1
ENDIF
RETURN host + page && 下一页
ENDFUNC
FUNCTION getHtmlBodyByUrl(cUrl, pHtml)
RETURN getHtmlByUrl(cUrl, @pHtml, 0)
ENDFUNC
FUNCTION getHtmlTextByUrl(cUrl, pHtml)
RETURN getHtmlByUrl(cUrl, @pHtml, 1)
ENDFUNC
FUNCTION getHtmlByUrl(cUrl, pHtml, nFlags)
TRY
pHtml = ""
LOCAL err
err = .f.
http.open("GET", cUrl, 0)
http.send()
IF http.status == 200
IF nFlags == 0
pHtml = http.ResponseBody
ELSE
pHtml = http.ResponseText
ENDIF
ENDIF
CATCH
err = .t.
ENDTRY
IF err
MESSAGEBOX("获取网页失败"+0h0D+cUrl)
ENDIF
RETURN !err
ENDFUNC
FUNCTION getPages(purl)
LOCAL pHtml
IF getHtmlTextByUrl(purl, @pHtml)
html.getElementsByTagName("body").item[0].innerHTML = pHtml
RETURN VAL(STREXTRACT(html.getElementById("play_0").getElementsByTagName("li").item[0].innerText,"第","章"))
ENDIF
RETURN 0
ENDFUNC
DEFINE CLASS wait_form as Form
width = 500
height = 150
AutoCenter = .t.
showwindow = 0
windowtype = 0
titlebar = 0
movable = .f.
borderstyle = 2
alwaysontop = .t.
ADD OBJECT msg as label WITH left=20,top=20,width=460,height=60,;
caption="请稍候......",alignment=2,fontsize=12
ADD OBJECT statbox as shape WITH left=20,top=100,width=460,height=22,;
SpecialEffect=1,BackColor=0x00FFFFFF,BorderColor=0x00C0C0C0
ADD OBJECT statbar as shape WITH left=20,top=100,width=0,height=22,;
SpecialEffect=1,BackColor=0x00C08000,BorderColor=0x00C0C0C0
PROCEDURE DblClick
this.Release
ENDPROC
ENDDEFINE
[此贴子已经被作者于2025-12-10 11:47编辑过]