标题:URLDemo.MyThread cannot be resolved to a type 怎么解决
注 册:2020-4-30
 问题点数:0 回复次数:1 
URLDemo.MyThread cannot be resolved to a type 怎么解决
package com.exe1;
 * 读取当当网下首页图书的数据,并进行分析
 * 爬取深度为2
 * 爬取数据存储到D:\Java文件保存库\爬虫信息文件,目录下
 * */
import *;
import *;
import java.util.*;
import java.util.regex.*;

public class URLDemo2 {
    private static final int MAX_THREAD = 0;
    private static String savepath = "D:\\Java文件保存库\\爬虫信息文件";
    private static List<String> allwaiturl = new ArrayList<>();
    private static Set<String> alloverurl = new HashSet<>();
    private static Map<String,Integer> allurldepth = new HashMap<>();
    private static int maxdepth = 2;
    private static Object obj = new Object();
    private static int count = 0;
    public static void main(String args[]) {
        //网址为    http://book.
        //String strurl = "http://search.
        String strurl = "http://book.
        for(int i=0;i<MAX_THREAD;i++){
              new URLDemo().new MyThread().start();

     * 网页数据爬取
     * @param strurl
     * @param depth
     * */
    public static void workurl(String strurl,int depth) {
        if(!(alloverurl.contains(strurl)||depth>maxdepth)) {
            System.out.println("当前执行:"+Thread.currentThread().getName()+" 爬取线程处理爬取:"+strurl);
            try {
                URL url = new URL(strurl);
                URLConnection conn = url.openConnection();
                InputStream is = conn.getInputStream();
                if(conn.getContentType().startsWith("text")) {
                //因此用BufferedRead 和 InputStreamReader 把字节流转换为字符流的缓冲流
                BufferedReader br = new BufferedReader(new InputStreamReader(is, "GB2312"));
                String line = null;
                Pattern p = ("<a.*href=.+</a>");
                PrintWriter pw=new PrintWriter(new File(savepath+System.currentTimeMillis()+".txt"));
                while((line = br.readLine())!=null) {
                    Matcher m = p.matcher(line);
                    while(m.find()) {
                        String href = m.group();
                        href = href.substring(href.indexOf("href="));
                            href = href.substring(6);
                        }else {
                            href = href.substring(5);
                        try {
                            href = href.substring(0, href.indexOf("\""));
                        }catch(Exception e) {
                            try {
                                href = href.substring(0, href.indexOf(" "));
                            }catch(Exception e1) {
                                href = href.substring(0, href.indexOf(">"));
                         * 输出该网页存在的链接
                         * System.out.println(href);
                         * 将url地址放到队列中
                         * */
            }catch(Exception e) {
                //TODO Auto-generated catch block
         * 用递归的方法继续爬取其他链接
         * String nexturl = allwaiturl.get(0);
         * allwaiturl.remove(0);
         * workurl(nexturl,allurldepth.get(nexturl));
         * */
        if(allwaiturl.size()>0) {
            synchronized(obj) {
        }else {
         * 将获取的url放入等待队列中,同时判断是否已经放过
         * @param href
         * @param depth
         * */
        public static synchronized void addurl(String href, int depth) {
            if(!allurldepth.containsKey(href)) {
                allurldepth.put(href, depth+1);
         * 移除爬取完成的url,获取下一个未爬取的url
         * @return
         * */
        public static synchronized String geturl() {
            String nexturl = allwaiturl.get(0);
            return nexturl;
         * 线程分配任务
         * */
        public class MyThread extends Thread{
            public void run() {
                while(true) {
                    if(allwaiturl.size()>0) {
                        String url = geturl();
                    }else {
                        synchronized(obj) {
                            try {
                            }catch(Exception e) {

2020-10-25 21:55
Rank: 2
等 级:论坛游民
威 望:1
帖 子:19
注 册:2021-7-2
2021-07-05 20:15
