注册 登录
编程论坛 J2EE论坛

请教一下读取网页内容的问题

specilize 发布于 2011-10-21 23:23, 571 次点击
我在做一个音乐播放器,像通过程序输入歌曲的名字,然后使用百度的搜索mp3的功能返回给我一个歌曲的url和歌词的url,然后去看了下开源的YOYOPLAYER,里面的读取网页得到歌词的内容着实看不懂(就是如何使用HttpClient懂一点,但是具体怎么得到url我就不懂),把相关代码贴上来,谁来帮我讲解一下,或是给个参考例子给我看下,万分感谢
 private static String getBaidu_Lyric(String key) throws Exception {
        HttpClient http = new HttpClient();
        http.getParams().setContentCharset("GBK");
        GetMethod get = new GetMethod("http://www.baidu.com/s?wd=" + URLEncoder.encode("filetype:lrc " + key, "GBK"));
        get.addRequestHeader("Host", "www.baidu.com");
        get.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11");
        get.addRequestHeader("Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5");
        get.addRequestHeader("Accept-Language", "zh-cn,zh;q=0.5");
        get.addRequestHeader("Keep-Alive", "300");
        get.addRequestHeader("Referer", "http://www.baidu.com/");
        get.addRequestHeader("Connection", "keep-alive");
        int i = http.executeMethod(get);
        String temp = getString(get.getResponseBodyAsStream());
        get.releaseConnection();
//        System.out.println("TEMP="+temp);
//        Matcher m = ("(?<=<b>【LRC】</b>).*?(?=文件格式)").matcher(temp);
//        Matcher m = ("(?<='\\)\" href=\").*?(?=\" target=\"_blank\"><font size=\"3\">)").matcher(temp);
        Matcher m = ("(?<=LRC/Lyric - <a href=\").*?(?=\" target=\"_blank\">HTML版</a>)").matcher(temp);
        String content = null;
        if (m.find()) {
            String str = m.group();
            content = Util.getURLContent(str);
            m = ("(?<=<body>).*?(?=</body>)").matcher(content);
            if (m.find()) {
                content = m.group();
            }
        }
        System.out.println(content);
        return content;
    }

   
private static String getString(InputStream is) {
        InputStreamReader r = null;
        try {
            StringBuilder sb = new StringBuilder();
            //TODO 这里是固定把网页内容的编码写在GBK,应该是可设置的
            r = new InputStreamReader(is, "GBK");
            char[] buffer = new char[128];
            int length = -1;
            while ((length = r.read(buffer)) != -1) {
                sb.append(new String(buffer, 0, length));
            }
            return sb.toString();
        } catch (Exception ex) {
            System.out.println("发生在getString");
            return "";
        } finally {
            try {
                r.close();
            } catch (Exception ex) {
                System.out.println("发生在finally中");
            }
        }
    }
0 回复
1