注册 登录
编程论坛 VFP论坛

请高手指点一下 如何获取这个网页的数据

nbwww 发布于 2021-08-28 18:29, 4913 次点击
https://

cUrl = "https://
oHTTP=NEWOBJECT("MSXML2.XMLHTTP")
oHTTP.Open("post", cUrl, .F.)
oHTTP.SETREQUESTHEADER("Content-Type", "text/html;charset=UTF-8")
oHTTP.Send()
lcText = STRCONV(oHTTP.ResponseBody, 11)
? lcText

这样没有取到数据。
40 回复
#2
schtg2021-08-28 20:20
我水平有限,只能这样的,哈哈
只有本站会员才能查看附件,请 登录
#3
nbwww2021-08-29 07:43
怎样取得的?请指教一下
#4
nbwww2021-08-29 10:26
笨是笨了点    爬是爬下来了

CLEAR
PUBLIC oie
oie = CREATEOBJECT("internetexplorer.application")
lurl="https://
oie.Navigate(lurl)
DO WHILE oie.Busy OR oie.ReadyState!=4
   * DOEVENTS
    INKEY(2)
    ?? " 请稍候......",TIME()+0h0D
ENDDO
WAIT WINDOW "延时等待中..." TIMEOUT 1
bError = .F.
TRY
    lctext = oie.document.body.innerHTML
CATCH
    bError = .T.
    MESSAGEBOX("网页打开错误",0+16,"出错了")
ENDTRY

**?page=17">尾页
C1 = '<li class="PagedList-skipToLast">'
C2 = '</a></li></ul>'
页数=STREXTRACT(lcText, C1, C2,1)
?页数
C1 = '?page='
C2 = '">尾页'
页数=VAL((STREXTRACT(页数, C1, C2,1)))
 
   

*****标题行*******
C1 = '<th style="width: 20%;">'
C11='<th style="width: 5%;">'
C12='<th style="width: 15%;">'
C2 = "</th>"
a1=''
a1=a1+ALLTRIM(STREXTRACT(lcText, C1, C2,1))+','
a1=a1+ALLTRIM(STREXTRACT(lcText, C11, C2,1))+','
a1=a1+ALLTRIM(STREXTRACT(lcText, C12, C2,1))+','


C1 = '<th>'
C2 = "</th>"
FOR ii=1 TO 6
    a1=a1+ALLTRIM(STREXTRACT(lcText, C1, C2,ii))+','
ENDFOR
a1=a1+CHR(13)


*******************内容*****************
FOR aii1=1 TO 页数
   oie = CREATEOBJECT("internetexplorer.application")                  &&这里为什么不能重复打开??一重复打开就出错
   oie.Navigate(lurl)
   DO WHILE oie.Busy OR oie.ReadyState!=4
      * DOEVENTS
       INKEY(2)
       ?? " 请稍候......",TIME()+0h0D
   ENDDO
   WAIT WINDOW "延时等待中..." TIMEOUT 2
  TRY
      lctext = oie.document.body.innerHTML
  CATCH
     bError = .T.
    MESSAGEBOX("网页打开错误",0+16,"出错了")
  ENDTRY
   C1 = '<td>'
   C2 = "</td>"
   FOR iii=1 TO  10
      a1=a1+ALLTRIM(STREXTRACT(lcText,'<td style="width: 20%;">', C2,iii))+','
      a1=a1+ALLTRIM(STREXTRACT(lcText,'<td style="width: 5%;">', C2,iii))+','
      a1=a1+CHRTRAN(ALLTRIM(STREXTRACT(lcText,'<td style="width: 15%;">', C2,iii)),CHR(10)+CHR(13),'')+','
      FOR ii=1 TO 6
         a1=a1+ALLTRIM(STREXTRACT(lcText, C1, C2,ii+(iii-1)*6))+','
      ENDFOR
      a1=a1+CHR(13)
   ENDFOR
   c1='<li class="PagedList-skipToNext"><a href="'
   c2='" rel="next">下一页'
   lurl="https://(STREXTRACT(lcText, C1, C2,1))
   ?lurl
ENDFOR
?a1  
_CLIPTEXT =a1   
STRTOFILE(a1,'aaa.csv',0)


[此贴子已经被作者于2021-8-29 10:29编辑过]

#5
nbwww2021-08-29 10:29
FOR aii1=1 TO 页数
   oie = CREATEOBJECT("internetexplorer.application")                  &&这里为什么不能在原来的页面重复打开??一重复打开就出错

有没有帮我试一下   把oie = CREATEOBJECT("internetexplorer.application")  屏蔽看出错不?
告诉我一下操作系统和VFP版本号  谢谢
#6
吹水佬2021-08-29 19:33
以下是引用nbwww在2021-8-29 10:29:06的发言:

FOR aii1=1 TO 页数
   oie = CREATEOBJECT("internetexplorer.application")                  &&这里为什么不能在原来的页面重复打开??一重复打开就出错

有没有帮我试一下   把oie = CREATEOBJECT("internetexplorer.application")  屏蔽看出错不?
告诉我一下操作系统和VFP版本号  谢谢

来回跑得太快了?
#7
nbwww2021-08-29 20:17
加延时也没用   只能杀进程   一次次打开
#8
山中石2021-08-29 23:10
NloWebBrowser =  CREATEOBJECT("INTERNETEXPLORER.APPLICATION")
 NloWebBrowser.Navigate(PSTR) 这里不同页面
#9
吹水佬2021-08-30 09:54
不能打开网页了
https://
#10
nbwww2021-08-30 13:37
可以打开的   有IP限止?
#11
吹水佬2021-08-30 17:45
以下是引用nbwww在2021-8-30 13:37:08的发言:

可以打开的   有IP限止?

可能是,换IP就好了。
不用浏览器打开时,好象要网页的cookie才能正常获取网页
只有本站会员才能查看附件,请 登录

程序代码:
cUrl = "https:///project/buildtablelist/938d3183-00f9-eb11-8e8f-005056b8d0cb"
nPageCount = 0
cookie = GetUrlCookie(cUrl, @nPageCount)
IF nPageCount == 0
    RETURN
ENDIF
? "共 "+TRANSFORM(nPageCount)+" 页"
CREATE CURSOR tt (楼栋 C(50),房号 C(4),套内面积 c(10),建筑面积 c(10),;
    所在层 C(2),规划用途 C(4),备案总价 C(12),户型 C(12),销售状态 C(8))
FOR i=1 TO nPageCount
    cTxt = UrlDownload(cUrl+"?page="+TRANSFORM(i), cookie)
    ? "第 "+TRANSFORM(i)+" 页"+IIF(!EMPTY(cTxt), " 成 功"," 失 败")
    IF !EMPTY(cTxt)
        GetTable(cTxt)
    ENDIF
    INKEY(1)  && 要延时
ENDFOR
SELECT * FROM tt
RETURN

FUNCTION GetUrlCookie(cUrl, nPageCount)
    LOCAL ie, cTxt
    ie = CREATEOBJECT("internetexplorer.application")
    ie.Navigate(cUrl)
    DO WHILE ie.Busy OR ie.ReadyState!=4
        INKEY(1)
        ?? " 请稍候......",TIME()+0h0D
    ENDDO
    IF VARTYPE(ie.document)!="O"
        RETURN ""
    ENDIF
    cTxt = ie.document.body.innerHTML
    nPageCount = VAL(STREXTRACT(cTxt, [page=], [">尾页], OCCURS("page=",cTxt)))
    RETURN ie.document.cookie
ENDFUNC

FUNCTION UrlDownload(cUrl, Cookie)
    LOCAL wh
    wh = CREATEOBJECT("WinHttp.WinHttpRequest.5.1")
    wh.Open("GET", cUrl, 0)
    wh.SetRequestHeader("Cookie", Cookie)
    wh.Send()
    IF wh.status==200
        RETURN STREXTRACT(wh.ResponseText,[<tbody>],[</tbody>])
    ENDIF
    RETURN ""
ENDFUNC

FUNCTION GetTable(cTxt)
    LOCAL i,tr
    FOR i =1 TO OCCURS("<tr>",cTxt)
        tr = STREXTRACT(cTxt, [<tr>], [</tr>], i)
        INSERT INTO tt VALUES (;
            ALLTRIM(STREXTRACT(tr, [>], [<], 1), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 3), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 5), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 7), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 9), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 11), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 13), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 15), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 17), 0h20,0h0D,0h0A))
    ENDFOR
ENDFUNC


#12
nbwww2021-08-30 18:26
谢谢   让我先学习消化下
#13
sdta2021-08-30 21:07
程序代码:
CREATE CURSOR lpmx (楼栋 C(50), 房号 C(4), 套内面积 c(10), 建筑面积 c(10), 所在层 C(2), 规划用途 C(4), 备案总价 C(12), 户型 C(12), 销售状态 C(8))
lcUrl = "https:///project/buildtablelist/938d3183-00f9-eb11-8e8f-005056b8d0cb"
lcText = GETURL(lcUrl, 1)
lnPage = INT(VAL(STREXTRACT(lcText, '?page=', '">尾页', 9)))

FOR ln1 = 1 TO lnPage
    lcText = GETURL(lcUrl + "?page=" + TRANSFORM(ln1), 2)
    ALINES(laText, lcText, 4 + 1, '<tr>', '</tr>')
    FOR ln2 = 2 TO ALEN(laText, 1) STEP 2
        ALINES(laTxt, laText[ln2], 4 + 1, CHR(13), CHR(10))
        INSERT INTO lpmx VALUES (STREXTRACT(laTxt[1], '>', '<'),;
                                 STREXTRACT(laTxt[2], '>', '<'),;
                                 STREXTRACT(laTxt[3], '>', '<'),;
                                 STREXTRACT(laTxt[4], '>', '<'),;
                                 STREXTRACT(laTxt[5], '>', '<'),;
                                 STREXTRACT(laTxt[6], '>', '<'),;
                                 laTxt[8],;
                                 STREXTRACT(laTxt[10], '>', '<'),;
                                 STREXTRACT(laTxt[11], '>', '<'))
    ENDFOR
ENDFOR
BROWSE
RETURN

FUNCTION GETURL(UrlName, nId)
    lcStr = ""
    oHTTP = CREATEOBJECT("MSXML2.ServerXMLHTTP")
    oHTTP.Open("GET", UrlName, .F.)
    TEXT TO lcCookie NOSHOW TEXTMERGE
Hm_lvt_b33309ddse6aedc0b7a6a5e11379efd=329a1f7015f695236266a8366ef8bcc2638265d047ce81b256a824b2f3039d4086dcfd62e2832ab60e0c08a62ea9b49270c99907fc27a61da3a72f86aa801d244dbd54ab670762a4e2710372639fc3a73457755d8208e4bc0021eeb56e6bdda182770a58f2d25d5927c239f145ac3dc9373a3d21195366ff345d92173b7dadb96373f7036027b05c394f83864e091e7bf9cda9d003adff962b57097e3e55f21bab556dfb8f26718e801b1579d1a602c7; Hm_lvt_02ba6cbab8b600b7cbf07a9fea1df5a9=1630158176,1630173575,1630323282; Hm_lpvt_02ba6cbab8b600b7cbf07a9fea1df5a9=1630323282; Hm_lvt_615328aa2076e9a79247e4a9a10139b9=1630169491,1630173779,1630323284,1630323327; Hm_lpvt_615328aa2076e9a79247e4a9a10139b9=1630323362
    ENDTEXT
    oHTTP.SetRequestHeader("Cookie", lcCookie)
    oHTTP.Send()
    IF oHTTP.Status = 200
        IF nId = 2
            lcStr = STREXTRACT(oHTTP.ResponseText, '<tbody>', '</tbody>')
        ELSE
            lcStr = oHTTP.ResponseText
        ENDIF
    ENDIF
    RELEASE oHTTP
    RETURN lcStr
ENDFUNC
#14
吹水佬2021-08-30 21:13
回复 13楼 sdta
cookie好象有时限,过时就无效
#15
sdta2021-08-30 21:32
以下是引用吹水佬在2021-8-30 21:13:59的发言:

cookie好象有时限,过时就无效

是的,数据下载不完全
#16
nbwww2021-08-31 19:04
  学习中
#17
吹水佬2021-09-03 10:54
以下是引用sdta在2021-8-30 21:32:23的发言:


是的,数据下载不完全

从自动跳转的页面代码看,COOKIE的创建过程:
程序代码:
    <script src="/src/js/jquery.cookie.min.js?v=1"></script>
    <script src="/src/js/crypto-js-4.0.0/crypto-js.js?v=1"></script>

    <script>
        var a1 = "784A9BB8C2B542F6B7133E45B4C9BC50";
        var b1 = "61B2D25DB69F2B3A06485A2293FCFCA79D29AF3E829B22CA0D5E7DC2A0AE586ABD0975AB7B31182194F61E73F576C44C";
        var c1 = '10B242A121CBFD7C6F09AA9F5150C081046DC982B5364AB4D73F1776A362A75A67DB9697A51FADF68E62CF5384F8FC51DA80F71FEFC90F3D3922B813383DD1259681A5EFAEDCEF578A92440E73A5582B';
        var d1 = 'Hm_lvt_b33309ddse6aedc0b7a6a5e11379efd'
        var h1 = '.'
        var i1 = 'http://'
        var k1 = 0
    </script>
    <script src="/src/js/jquery.sprawl.js?v=1"></script>

$(function () {
    var h = CryptoJS;
    var a = h.enc.Utf8.parse(a1);
    var b = h.enc.Utf8.parse(b1);
    var c = h.enc.Utf8.parse(c1);
    var d = h.AES.encrypt(c, a, { iv: b, mode: CryptoJS.mode.CBC, padding: CryptoJS.pad.Pkcs7 });
    var f = d.ciphertext.toString();
    var date = new Date();
    date.setTime(date.getTime() + (30 * 24 * 60 * 60 * 1000));
    $.cookie(d1, f, { domain: h1, expires: date });
    if (!k1) {
        location.replace(i1)
    }
});

创建语句:
$.cookie(d1, f, { domain: h1, expires: date });
是创建一个带域名和时限的cookie

自动跳转到:http://
location.replace(i1)


[此贴子已经被作者于2021-9-3 10:55编辑过]

#18
sdta2021-09-03 19:38
回复 17楼 吹水佬
VFP代码如何写
#19
吹水佬2021-09-03 21:38
以下是引用sdta在2021-9-3 19:38:51的发言:

VFP代码如何写

先截获到跳转页面,就可以看到那些JS代码
相关几个JS文件:
http://
http://
http://
如果能整理出相关的JS代码,VFP可以用ScriptControl控件执行JS代码。
有兴趣的话最好将相关的JS代码转换为VFP代码,就看有无这必要。
#20
schtg2021-09-04 05:41
这也太高大上啦,哈哈^_^
#21
nbwww2021-09-04 08:14
#22
nbwww2021-09-04 08:46
回复 11楼 吹水佬
ALLTRIM(STREXTRACT(tr, [>], [<], 1), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 3), 0h20,0h0D,0h0A),;

请教一下吹版  这后面的   0h20,0h0D,0h0A  是什么作用?
指导一下  谢谢
#23
吹水佬2021-09-04 16:34
以下是引用nbwww在2021-9-4 08:46:51的发言:

ALLTRIM(STREXTRACT(tr, [>], [<], 1), 0h20,0h0D,0h0A),;
ALLTRIM(STREXTRACT(tr, [>], [<], 3), 0h20,0h0D,0h0A),;

请教一下吹版  这后面的   0h20,0h0D,0h0A  是什么作用?
指导一下  谢谢

3种字符:空格、回车、换行
#24
sdta2021-09-04 17:34
回复 19楼 吹水佬
多谢吹版,正如楼上所说,这有点高大上了!
#25
nbwww2021-09-04 18:09
学习了  谢谢
#26
吹水佬2021-09-05 16:51
以下是引用sdta在2021-9-4 17:34:14的发言:

多谢吹版,正如楼上所说,这有点高大上了!

“高大上”,吹下水还可以,其实编程没什么高大上,编程就是这样子,重在理解和思路,只求目的,用什么语言都无所谓。
看了一下页面相关的JS代码,涉及到字符串编码解码的问题,有点复杂,要转为VFP代码可能要花点时间。
试了一下取出相关JS代码在VFP中运行来获取COOKIE的值(忽略域名和时间)
程序代码:
DECLARE LONG URLDownloadToFileA IN Urlmon LONG,STRING@,STRING@,LONG,LONG
DECLARE LONG DeleteUrlCacheEntry IN Wininet STRING@
cUrl = "https:///project/buildtablelist/938d3183-00f9-eb11-8e8f-005056b8d0cb"
cookie = GetUrlCookie(cUrl)
IF EMPTY(cookie)
    RETURN
ENDIF
cTxt = UrlDownload(cUrl, cookie, 1)
nPageCount = VAL(STREXTRACT(cTxt, [page=], [">尾页], OCCURS("page=",cTxt)))
IF nPageCount == 0
    RETURN
ENDIF
CREATE CURSOR tt (楼栋 C(50),房号 C(4),套内面积 c(10),建筑面积 c(10),;
    所在层 C(2),规划用途 C(4),备案总价 C(12),户型 C(12),销售状态 C(8))
? "共 "+TRANSFORM(nPageCount)+" 页"
? "第 1 页 成 功"
GetTable(STREXTRACT(cTxt,[<tbody>],[</tbody>]))
FOR i=2 TO nPageCount
    cTxt = UrlDownload(cUrl+"?page="+TRANSFORM(i), cookie)
    ? "第 "+TRANSFORM(i)+" 页"+IIF(!EMPTY(cTxt), " 成 功"," 失 败")
    IF !EMPTY(cTxt)
        GetTable(cTxt)
    ENDIF
    INKEY(1)  && 要延时
ENDFOR
SELECT * FROM tt
CLEAR DLLS
RETURN

FUNCTION UrlDownload(cUrl, Cookie, nFlags)
    LOCAL wh
    wh = CREATEOBJECT("WinHttp.WinHttpRequest.5.1")
    wh.Open("GET", cUrl, 0)
    wh.SetRequestHeader("Cookie", Cookie)
    wh.Send()
    IF wh.status==200
        IF !EMPTY(nFlags)
            RETURN wh.ResponseText
        ENDIF
        RETURN STREXTRACT(wh.ResponseText,[<tbody>],[</tbody>])
    ENDIF
    RETURN ""
ENDFUNC

FUNCTION GetTable(cTxt)
    LOCAL i,tr
    FOR i =1 TO OCCURS("<tr>",cTxt)
        tr = STREXTRACT(cTxt, [<tr>], [</tr>], i)
        INSERT INTO tt VALUES (;
            ALLTRIM(STREXTRACT(tr, [>], [<], 1), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 3), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 5), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 7), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 9), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 11), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 13), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 15), 0h20,0h0D,0h0A),;
            ALLTRIM(STREXTRACT(tr, [>], [<], 17), 0h20,0h0D,0h0A))
    ENDFOR
ENDFUNC

FUNCTION GetUrlCookie(cUrl)
    cHtml = UrlToStr(cUrl)
    IF EMPTY(cHtml)
       RETURN ""
    ENDIF
    jsCode = GetJSCode(cHtml,7) + 0h0D0A
    cTxt = STREXTRACT(cHtml,[<script>],[</script>],2)
    TEXT TO jsCode TEXTMERGE NOSHOW PRETEXT 7
        <<jsCode>>
        <<cTxt>>
        var h = CryptoJS;
        var a = h.enc.Utf8.parse(a1);
        var b = h.enc.Utf8.parse(b1);
        var c = h.enc.Utf8.parse(c1);
        var d = h.AES.encrypt(c, a, { iv: b, mode: CryptoJS.mode.CBC, padding: CryptoJS.pad.Pkcs7 });
        var f = d.ciphertext.toString();
        var vCookie = d1 + "=" + f;
    ENDTEXT
    sc = CREATEOBJECT("ScriptControl")
    sc.Language = "JavaScript"
    sc.AddCode(jsCode)
    RETURN sc.Eval("vCookie")
ENDFUNC

FUNCTION UrlToStr(cUrl)
    IF URLDownloadToFileA(0,cUrl,"tmp.txt",0,0) != 0
        MESSAGEBOX("调用UrlToStr失败")
        RETURN ""
    ENDIF
    DeleteUrlCacheEntry(cUrl)
    ret = STRCONV(FILETOSTR("tmp.txt"),11)
    DELETE FILE tmp.txt
    RETURN ret
ENDFUNC

FUNCTION GetJSCode(cHtml, n)
    LOCAL jsCode, jsUrl
    jsUrl = "https:// + STREXTRACT(cHtml,[<script src="],["></script>],n)
    RETURN UrlToStr(jsUrl)
ENDFUNC


#27
schtg2021-09-05 19:02
@吹版,高!谢谢!
#28
sdta2021-09-05 19:07
回复 26楼 吹水佬
共下载数据160条
#29
吹水佬2021-09-05 19:51
以下是引用sdta在2021-9-5 19:07:42的发言:

共下载数据160条

全部成功应该是164条,其中第17页4条
只有本站会员才能查看附件,请 登录

只有本站会员才能查看附件,请 登录
#30
sdta2021-09-05 19:53
我的电脑上下载第17页失败,不知是什么原因
#31
吹水佬2021-09-05 21:11
以下是引用sdta在2021-9-5 19:53:27的发言:

我的电脑上下载第17页失败,不知是什么原因

试试增加延时
INKEY(2)  && 要延时
#32
sdta2021-09-05 21:38
以下是引用吹水佬在2021-9-5 21:11:26的发言:


试试增加延时
INKEY(2)  && 要延时

多谢吹版,OK
#33
nbwww2021-09-06 16:31
谢谢 !!
#34
nbwww2021-09-06 16:33
http://www.chinatax.
能不能帮着分析下  这个页面上的数据如何获取
具体内容试了好几种办法都没有  
#35
吹水佬2021-09-06 18:08
回复 34楼 nbwww
IE控件可以打开吧,速度可能慢点,但使用简单。
#36
吹水佬2021-09-06 23:07
以下是引用nbwww在2021-9-6 16:33:06的发言:

http://www.chinatax.
能不能帮着分析下  这个页面上的数据如何获取
具体内容试了好几种办法都没有  

是不是要这些数据
只有本站会员才能查看附件,请 登录
#37
nbwww2021-09-07 16:38
oie = CREATEOBJECT("internetexplorer.application")
lurl="http://www.chinatax.
oie.Navigate(lurl)
DO WHILE oie.Busy OR oie.ReadyState!=4
    INKEY(2)
    ?? " 请稍候......",TIME()+0h0D
ENDDO
bError = .F.
TRY
    lctext = oie.document.body.innerHTML
CATCH
    bError = .T.
    MESSAGEBOX("网页打开错误",0+16,"出错了")
ENDTRY

?lctext
 _CLIPTEXT =lctext

[此贴子已经被作者于2021-9-7 17:00编辑过]

#38
nbwww2021-09-07 17:02
以下是引用吹水佬在2021-9-6 23:07:56的发言:


是不是要这些数据

是这些数据   第一页的搞下来了   第二页没有链接   直接运行javascript吗?  
#39
吹水佬2021-09-07 17:17
回复 38楼 nbwww
获取数据是用POST请求
请求地址:http://www.chinatax.
Send()数据格式:timeOption=0&page=页号&pageSize=每页行数&keyPlace=1&sort=dateDesc&qt=*
得到的数据是一个JS对象类型:{name1:value1, name2:value2, name3:[{},{},{}...]}
还发现标题长度超过DBF字符型长度限制,改用备注类型。
只有本站会员才能查看附件,请 登录

程序代码:
CREATE CURSOR 税收政策库 (title M, DOCNOVAL C(160), url C(160))
cUrl = "http://www.chinatax./api/query?siteCode=bm29000fgk&tab=all&key=9A9C42392D397C5CA6C1BF07E2E0AA6F"
wh = CREATEOBJECT("WinHttp.WinHttpRequest.5.1")
sc = CREATEOBJECT("ScriptControl")
sc.Language = "JavaScript"
? "每页100行"+0h0D0A
UrlToTable(cUrl)
SELECT * FROM 税收政策库
CLEAR ALL
RETURN

FUNCTION UrlToTable(cUrl)
    LOCAL nPage,cData,jsCode,i,err
    nPage = 1
    DO WHILE .T.
        cData = UrlToData(cUrl, nPage)
        cData = LOWER(cData)    && VFP解释时只认小写???
        TEXT TO jsCode TEXTMERGE NOSHOW PRETEXT 15
            var data=<<cData>>
        ENDTEXT
        err = .F.
        TRY
            sc.AddCode(jsCode)
        CATCH
            err = .T.
        ENDTRY
        IF err
            ? "获取数据失败"
            EXIT
        ENDIF
        IF sc.CodeObject.data.resultList.length == 0  && 没有数据
            EXIT
        ENDIF
        FOR EACH oList IN sc.CodeObject.data.resultList
            INSERT INTO 税收政策库 VALUES (oList.title, oList.customHs.DOCNOVAL, oList.url)
        ENDFOR
        ?? "第 "+TRANSFORM(nPage)+" 页" + 0h0D
        nPage = nPage + 1
    ENDDO
ENDFUNC

FUNCTION UrlToData(cUrl, nPage)
    wh.Open("POST", cUrl, 0)
    wh.SetRequestHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
    wh.Send("timeOption=0&page="+TRANSFORM(nPage)+"&pageSize=100&keyPlace=1&sort=dateDesc&qt=*")
    RETURN wh.ResponseText
ENDFUNC


[此贴子已经被作者于2021-9-7 17:20编辑过]

#40
nbwww2021-09-07 17:21
回复 39楼 吹水佬
谢谢     我消化下
#41
schtg2021-09-07 19:31
吹版,谢谢!
1