package net.csdn.csdnplus.activity;

import android.annotation.TargetApi;
import android.net.http.SslError;
import android.os.Build;
import android.os.Bundle;
import android.view.KeyEvent;
import android.webkit.SslErrorHandler;
import android.webkit.WebChromeClient;
import android.webkit.WebResourceRequest;
import android.webkit.WebResourceResponse;
import android.webkit.WebSettings;
import android.webkit.WebView;
import butterknife.BindView;
import butterknife.ButterKnife;
import com.networkbench.agent.impl.background.NBSApplicationStateMonitor;
import com.networkbench.agent.impl.instrumentation.NBSActionInstrumentation;
import com.networkbench.agent.impl.instrumentation.NBSAppInstrumentation;
import com.networkbench.agent.impl.instrumentation.NBSInstrumented;
import com.networkbench.agent.impl.instrumentation.NBSTraceEngine;
import com.networkbench.agent.impl.instrumentation.NBSTraceUnit;
import com.networkbench.agent.impl.instrumentation.NBSWebChromeClient;
import com.networkbench.agent.impl.instrumentation.NBSWebLoadInstrument;
import com.networkbench.agent.impl.instrumentation.NBSWebViewClient;
import defpackage.cuv;
import java.util.Map;
import net.csdn.csdnplus.R;
import net.csdn.csdnplus.dataviews.csdn.view.CSDNWebView;

@NBSInstrumented
/* loaded from: classes2.dex */
public class TestWebActivity extends BaseActivity {
    private static final String b = "TestWebActivity";
    public NBSTraceUnit a;

    @BindView(R.id.wv_content)
    CSDNWebView webView;

    private void a(WebView webView) {
        if (Build.VERSION.SDK_INT >= 19) {
            WebView.setWebContentsDebuggingEnabled(true);
        }
        WebSettings settings = webView.getSettings();
        settings.setJavaScriptEnabled(true);
        settings.setDatabasePath(getApplicationContext().getDir("cache", 0).getPath());
        settings.setDomStorageEnabled(true);
        settings.setCacheMode(-1);
        settings.setSupportZoom(true);
        settings.setBuiltInZoomControls(false);
        settings.setDisplayZoomControls(false);
        settings.setLayoutAlgorithm(WebSettings.LayoutAlgorithm.SINGLE_COLUMN);
        settings.setLoadsImagesAutomatically(true);
        settings.setUseWideViewPort(true);
        settings.setDatabaseEnabled(true);
        settings.setLoadWithOverviewMode(true);
        settings.setAppCacheEnabled(true);
        if (Build.VERSION.SDK_INT >= 21) {
            settings.setMixedContentMode(0);
        }
        cuv.a(settings);
        webView.requestFocus();
        NBSWebViewClient nBSWebViewClient = new NBSWebViewClient() { // from class: net.csdn.csdnplus.activity.TestWebActivity.1
            @Override // com.networkbench.agent.impl.instrumentation.NBSWebViewClient, android.webkit.WebViewClient
            public void onPageFinished(WebView webView2, String str) {
                super.onPageFinished(webView2, str);
            }

            @Override // com.networkbench.agent.impl.instrumentation.NBSWebViewClient, android.webkit.WebViewClient
            @TargetApi(23)
            public void onReceivedHttpError(WebView webView2, WebResourceRequest webResourceRequest, WebResourceResponse webResourceResponse) {
            }

            @Override // com.networkbench.agent.impl.instrumentation.NBSWebViewClient, android.webkit.WebViewClient
            public void onReceivedSslError(WebView webView2, SslErrorHandler sslErrorHandler, SslError sslError) {
                sslErrorHandler.proceed();
            }

            @Override // android.webkit.WebViewClient
            public boolean shouldOverrideUrlLoading(WebView webView2, String str) {
                return cuv.a(TestWebActivity.this, str, (WebView) null, (Map<String, String>) null);
            }
        };
        if (webView instanceof WebView) {
            NBSWebLoadInstrument.setWebViewClient(webView, nBSWebViewClient);
        } else {
            webView.setWebViewClient(nBSWebViewClient);
        }
        webView.setWebChromeClient(new WebChromeClient() { // from class: net.csdn.csdnplus.activity.TestWebActivity.2
            @Override // android.webkit.WebChromeClient
            public void onProgressChanged(WebView webView2, int i) {
                NBSWebChromeClient.initJSMonitor(webView2, i);
            }

            @Override // android.webkit.WebChromeClient
            public void onReceivedTitle(WebView webView2, String str) {
                if (Build.VERSION.SDK_INT >= 23 || str.contains("404")) {
                    return;
                }
                str.contains("Error");
            }
        });
    }

    @Override // net.csdn.csdnplus.activity.BaseActivity
    protected int n_() {
        return R.layout.activity_test_web;
    }

    @Override // net.csdn.csdnplus.activity.BaseActivity, android.support.v7.app.AppCompatActivity, android.support.v4.app.FragmentActivity, android.support.v4.app.SupportActivity, android.app.Activity
    public void onCreate(Bundle bundle) {
        NBSTraceEngine.startTracing(getClass().getName());
        super.onCreate(bundle);
        ButterKnife.a(this);
        this.webView.loadDataWithBaseURL(null, "<!DOCTYPE html>\n<html>\n<head>\n    <meta charset=\"utf-8\">\n    <meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\n    <meta http-equiv=\"X-UA-Compatible\" content=\"IE=Edge\">\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no\">\n    <meta name=\"apple-mobile-web-app-status-bar-style\" content=\"black\">\n    <meta content=\"yes\" name=\"apple-mobile-web-app-capable\">\n    <meta content=\"black\" name=\"apple-mobile-web-app-status-bar-style\">\n    <meta content=\"telephone=no\" name=\"format-detection\">\n    <meta content=\"email=no\" name=\"format-detection\">\n    <meta name=\"referrer\" content=\"always\">\n    <title>Python网络爬虫使用BeautifulSoup爬取网页内容并存入数据库案例 - CSDN博客</title>\n    <link href=\"https://csdnimg.cn/public/favicon.ico\" rel=\"SHORTCUT ICON\">\n    <link rel=\"stylesheet\" href=\"https://csdnimg.cn/release/phoenix/production/app_detail_article-df1a13af7c.css\">\n    <style>\n      .MathJax_Preview{\n        display: none !important;\n      }\n    </style>\n    <script>\n      //h5定义rem\n      initpage();\n      window.onresize = initpage;\n      function initpage(){\n        var view_width = document.getElementsByTagName('html')[0].getBoundingClientRect().width;\n        var _html = document.getElementsByTagName('html')[0];\n        window.fontSize = view_width>750?750/7.5:view_width/7.5;\n        _html.style.fontSize= window.fontSize+'px';\n      }\n    </script>\n  </head>\n<body class=\"\">\n<!-- flowchart 箭头图标 勿删 -->\n<svg xmlns=\"http://www.w3.org/2000/svg\" style=\"display: none;\"><path stroke-linecap=\"round\" d=\"M5,0 0,2.5 5,5z\" id=\"raphael-marker-block\" style=\"-webkit-tap-highlight-color: rgba(0, 0, 0, 0);\"></path></svg>\n<div class=\"article_content \">\n            <link href=\"https://csdnimg.cn/release/phoenix/mdeditor/markdown_views-b6c3c6d139.css\" rel=\"stylesheet\">\n      <link rel=\"stylesheet\" href=\"https://csdnimg.cn/release/phoenix/production/wapedit_views_md-f62139723e.css\" />\n  <div id=\"content_views\" class=\"markdown_views clearfix  prism-atom-one-dark\">\n          <h1><a id=\"BeautifulSoup_0\"></a>使用BeautifulSoup爬取网页内容并存入数据库案例</h1>\n<h2><a id=\"Python_2\"></a>学习了Python网络爬虫，完成里一个比较完整的爬虫案例与大家分享</h2>\n<p>爬取地址：<a href=\"http://www.tipdm.com/cpzx/index.jhtml\" rel=\"nofollow\">http://www.tipdm.com/cpzx/index.jhtml</a><br>\n任务：爬取网页中产品中心的小标题、简介、超链接，存入数据库<br>\n数据库使用的是Mysql，直接使用代码创建数据库数据表以及插入数据</p>\n<h3><a id=\"_6\"></a>长篇短写，代码如下：</h3>\n<pre><code class=\"prism language-python\"><span class=\"token keyword\">import</span> requests\n<span class=\"token keyword\">import</span> pymysql\n<span class=\"token keyword\">from</span> bs4 <span class=\"token keyword\">import</span> BeautifulSoup\n\n\n<span class=\"token keyword\">def</span> <span class=\"token function\">get_html_text</span><span class=\"token punctuation\">(</span>url<span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n    headers <span class=\"token operator\">=</span> <span class=\"token punctuation\">{</span>\n        <span class=\"token string\">'user-agent'</span><span class=\"token punctuation\">:</span> <span class=\"token string\">'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'</span>\n    <span class=\"token punctuation\">}</span>\n    html_resutl <span class=\"token operator\">=</span> requests<span class=\"token punctuation\">.</span>get<span class=\"token punctuation\">(</span>url<span class=\"token operator\">=</span>url<span class=\"token punctuation\">,</span> headers<span class=\"token operator\">=</span>headers<span class=\"token punctuation\">)</span>\n    <span class=\"token keyword\">return</span> html_resutl<span class=\"token punctuation\">.</span>text\n\n\n<span class=\"token keyword\">def</span> <span class=\"token function\">get_title_link_intro</span><span class=\"token punctuation\">(</span>html_text_list<span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n    result_list <span class=\"token operator\">=</span> <span class=\"token builtin\">list</span><span class=\"token punctuation\">(</span><span class=\"token punctuation\">)</span>\n    <span class=\"token keyword\">for</span> i <span class=\"token keyword\">in</span> <span class=\"token builtin\">range</span><span class=\"token punctuation\">(</span><span class=\"token builtin\">len</span><span class=\"token punctuation\">(</span>html_text_list<span class=\"token punctuation\">)</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n        result_bs <span class=\"token operator\">=</span> BeautifulSoup<span class=\"token punctuation\">(</span>html_text_list<span class=\"token punctuation\">[</span>i<span class=\"token punctuation\">]</span><span class=\"token punctuation\">,</span><span class=\"token string\">'lxml'</span><span class=\"token punctuation\">)</span>\n        search_con <span class=\"token operator\">=</span> result_bs<span class=\"token punctuation\">.</span>select<span class=\"token punctuation\">(</span><span class=\"token string\">'#t248 &gt; div &gt; div.con'</span><span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">for</span> i_con <span class=\"token keyword\">in</span> search_con<span class=\"token punctuation\">:</span>\n            result_list<span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span><span class=\"token punctuation\">[</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">)</span>\n            result_list<span class=\"token punctuation\">[</span><span class=\"token builtin\">len</span><span class=\"token punctuation\">(</span>result_list<span class=\"token punctuation\">)</span> <span class=\"token operator\">-</span> <span class=\"token number\">1</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span>i_con<span class=\"token punctuation\">.</span>select<span class=\"token punctuation\">(</span><span class=\"token string\">'h1&gt;a'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">[</span><span class=\"token number\">0</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>text<span class=\"token punctuation\">)</span>\n            result_list<span class=\"token punctuation\">[</span><span class=\"token builtin\">len</span><span class=\"token punctuation\">(</span>result_list<span class=\"token punctuation\">)</span> <span class=\"token operator\">-</span> <span class=\"token number\">1</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span>i_con<span class=\"token punctuation\">.</span>select<span class=\"token punctuation\">(</span><span class=\"token string\">'div'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">[</span><span class=\"token number\">0</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>text<span class=\"token punctuation\">)</span>\n            result_list<span class=\"token punctuation\">[</span><span class=\"token builtin\">len</span><span class=\"token punctuation\">(</span>result_list<span class=\"token punctuation\">)</span> <span class=\"token operator\">-</span> <span class=\"token number\">1</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span>i_con<span class=\"token punctuation\">.</span>select<span class=\"token punctuation\">(</span><span class=\"token string\">'h1&gt;a'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">[</span><span class=\"token number\">0</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>attrs<span class=\"token punctuation\">[</span><span class=\"token string\">'href'</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">)</span>\n    <span class=\"token keyword\">return</span> result_list\n\n\n<span class=\"token keyword\">def</span> <span class=\"token function\">connect_mysql</span><span class=\"token punctuation\">(</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n    <span class=\"token keyword\">try</span><span class=\"token punctuation\">:</span>\n        <span class=\"token keyword\">import</span> pymysql\n        connect <span class=\"token operator\">=</span> pymysql<span class=\"token punctuation\">.</span>connect<span class=\"token punctuation\">(</span>host<span class=\"token operator\">=</span><span class=\"token string\">'localhost'</span><span class=\"token punctuation\">,</span> user<span class=\"token operator\">=</span><span class=\"token string\">'root'</span><span class=\"token punctuation\">,</span> password<span class=\"token operator\">=</span><span class=\"token string\">'795247'</span><span class=\"token punctuation\">,</span> port<span class=\"token operator\">=</span><span class=\"token number\">3306</span><span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">print</span><span class=\"token punctuation\">(</span><span class=\"token string\">'连接数据库成功'</span><span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">return</span> connect\n    <span class=\"token keyword\">except</span><span class=\"token punctuation\">:</span>\n        <span class=\"token keyword\">print</span><span class=\"token punctuation\">(</span><span class=\"token string\">'连接数据库失败'</span><span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">return</span> <span class=\"token boolean\">None</span>\n\n\n<span class=\"token keyword\">def</span> <span class=\"token function\">mk_DB_base</span><span class=\"token punctuation\">(</span>connect<span class=\"token punctuation\">:</span> pymysql<span class=\"token punctuation\">.</span>connect<span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n    cursor <span class=\"token operator\">=</span> connect<span class=\"token punctuation\">.</span>cursor<span class=\"token punctuation\">(</span><span class=\"token punctuation\">)</span>\n    sql_crdDB_newdb <span class=\"token operator\">=</span> <span class=\"token string\">'CREATE DATABASE IF NOT EXISTS pzkdb'</span>\n    sql_use <span class=\"token operator\">=</span> <span class=\"token string\">'USE pzkdb;'</span>\n    sql_crdTB_products <span class=\"token operator\">=</span> <span class=\"token triple-quoted-string string\">'''CREATE TABLE IF NOT EXISTS products(\n                             `title` varchar(255) ,\n                             `intro` varchar(255)  NULL DEFAULT NULL,\n                             `link` varchar(255)  NULL DEFAULT NULL,\n                             primary key (`title`)\n                            ) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;\n                            '''</span>\n    cursor<span class=\"token punctuation\">.</span>execute<span class=\"token punctuation\">(</span>sql_crdDB_newdb<span class=\"token punctuation\">)</span>\n    cursor<span class=\"token punctuation\">.</span>execute<span class=\"token punctuation\">(</span>sql_use<span class=\"token punctuation\">)</span>\n    cursor<span class=\"token punctuation\">.</span>execute<span class=\"token punctuation\">(</span>sql_crdTB_products<span class=\"token punctuation\">)</span>\n    connect<span class=\"token punctuation\">.</span>commit<span class=\"token punctuation\">(</span><span class=\"token punctuation\">)</span>\n    <span class=\"token keyword\">return</span> <span class=\"token boolean\">True</span>\n\n\n<span class=\"token keyword\">def</span> <span class=\"token function\">into_sql</span><span class=\"token punctuation\">(</span>connect<span class=\"token punctuation\">:</span> pymysql<span class=\"token punctuation\">.</span>connect<span class=\"token punctuation\">,</span> values_list<span class=\"token punctuation\">,</span> table_name<span class=\"token operator\">=</span><span class=\"token string\">'products'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n    <span class=\"token comment\"># 存储数据结构为二维列表，存储在数据库的二维表中</span>\n    <span class=\"token comment\"># 传入一位数据元组,values_list</span>\n    <span class=\"token comment\"># 元组结构：Title(标题）,intro(内容）,link（对应标题链接）</span>\n    <span class=\"token keyword\">try</span><span class=\"token punctuation\">:</span>\n        cursor <span class=\"token operator\">=</span> connect<span class=\"token punctuation\">.</span>cursor<span class=\"token punctuation\">(</span><span class=\"token punctuation\">)</span>\n        sql_insert <span class=\"token operator\">=</span> <span class=\"token string\">'insert into %s values(%s,%s,%s)'</span> <span class=\"token operator\">%</span> <span class=\"token punctuation\">(</span>table_name<span class=\"token punctuation\">,</span> <span class=\"token string\">'%s'</span><span class=\"token punctuation\">,</span> <span class=\"token string\">'%s'</span><span class=\"token punctuation\">,</span> <span class=\"token string\">'%s'</span><span class=\"token punctuation\">)</span>\n        cursor<span class=\"token punctuation\">.</span>execute<span class=\"token punctuation\">(</span>sql_insert<span class=\"token punctuation\">,</span> values_list<span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">print</span><span class=\"token punctuation\">(</span><span class=\"token string\">'插入一条数据：'</span><span class=\"token punctuation\">,</span> values_list<span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">return</span> <span class=\"token boolean\">True</span>\n    <span class=\"token keyword\">except</span> pymysql<span class=\"token punctuation\">.</span>err<span class=\"token punctuation\">.</span>IntegrityError<span class=\"token punctuation\">:</span>\n        <span class=\"token keyword\">print</span><span class=\"token punctuation\">(</span><span class=\"token string\">'该数据已存在'</span><span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">return</span> <span class=\"token boolean\">None</span>\n    <span class=\"token keyword\">except</span><span class=\"token punctuation\">:</span>\n        <span class=\"token keyword\">return</span> <span class=\"token boolean\">None</span>\n\n\n<span class=\"token keyword\">def</span> <span class=\"token function\">into_list</span><span class=\"token punctuation\">(</span>connect<span class=\"token punctuation\">:</span> pymysql<span class=\"token punctuation\">.</span>connect<span class=\"token punctuation\">,</span> values_list<span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n    <span class=\"token comment\"># 存储数据结构为二维列表，存储在数据库的二维表中</span>\n    <span class=\"token comment\"># 传入一位数据元组,values_list</span>\n    <span class=\"token comment\"># 元组结构：Title(标题）,intro(内容）,link（对应标题链接）</span>\n    <span class=\"token keyword\">try</span><span class=\"token punctuation\">:</span>\n        <span class=\"token keyword\">for</span> index_list <span class=\"token keyword\">in</span> values_list<span class=\"token punctuation\">:</span>\n            into_sql<span class=\"token punctuation\">(</span>connect<span class=\"token operator\">=</span>connect<span class=\"token punctuation\">,</span> values_list<span class=\"token operator\">=</span>index_list<span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">print</span><span class=\"token punctuation\">(</span><span class=\"token string\">'数据插入完成'</span><span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">return</span> <span class=\"token boolean\">True</span>\n    <span class=\"token keyword\">except</span><span class=\"token punctuation\">:</span>\n        <span class=\"token keyword\">return</span> <span class=\"token boolean\">None</span>\n\n\n<span class=\"token keyword\">if</span> __name__ <span class=\"token operator\">==</span> <span class=\"token string\">'__main__'</span><span class=\"token punctuation\">:</span>\n    html_text_list<span class=\"token operator\">=</span><span class=\"token punctuation\">[</span><span class=\"token punctuation\">]</span>\n    <span class=\"token keyword\">for</span> i <span class=\"token keyword\">in</span> <span class=\"token builtin\">range</span><span class=\"token punctuation\">(</span><span class=\"token number\">1</span><span class=\"token punctuation\">,</span> <span class=\"token number\">5</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n        html_text_list<span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span>get_html_text<span class=\"token punctuation\">(</span>url<span class=\"token operator\">=</span><span class=\"token string\">'http://www.tipdm.com/cpzx/index_'</span> <span class=\"token operator\">+</span> <span class=\"token builtin\">str</span><span class=\"token punctuation\">(</span>i<span class=\"token punctuation\">)</span> <span class=\"token operator\">+</span> <span class=\"token string\">'.jhtml'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">)</span>\n    result <span class=\"token operator\">=</span> get_title_link_intro<span class=\"token punctuation\">(</span>html_text_list<span class=\"token punctuation\">)</span>\n    connect <span class=\"token operator\">=</span> connect_mysql<span class=\"token punctuation\">(</span><span class=\"token punctuation\">)</span>\n    mk_DB_base<span class=\"token punctuation\">(</span>connect<span class=\"token punctuation\">)</span>\n    into_list<span class=\"token punctuation\">(</span>connect<span class=\"token punctuation\">,</span> result<span class=\"token punctuation\">)</span>\n    connect<span class=\"token punctuation\">.</span>commit<span class=\"token punctuation\">(</span><span class=\"token punctuation\">)</span>\n\n</code></pre>\n<p><strong>这边也可以使用Xpath的方式进行切片爬取：</strong></p>\n<pre><code class=\"prism language-python\"><span class=\"token keyword\">def</span> <span class=\"token function\">get_title_link_intro</span><span class=\"token punctuation\">(</span>html_text_list<span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n    result_list <span class=\"token operator\">=</span> <span class=\"token builtin\">list</span><span class=\"token punctuation\">(</span><span class=\"token punctuation\">)</span>\n    <span class=\"token keyword\">for</span> i <span class=\"token keyword\">in</span> <span class=\"token builtin\">range</span><span class=\"token punctuation\">(</span><span class=\"token builtin\">len</span><span class=\"token punctuation\">(</span>html_text_list<span class=\"token punctuation\">)</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">:</span>\n        result_lxml <span class=\"token operator\">=</span> etree<span class=\"token punctuation\">.</span>HTML<span class=\"token punctuation\">(</span>html_text_list<span class=\"token punctuation\">[</span>i<span class=\"token punctuation\">]</span><span class=\"token punctuation\">,</span> etree<span class=\"token punctuation\">.</span>HTMLParser<span class=\"token punctuation\">(</span>encoding<span class=\"token operator\">=</span><span class=\"token string\">'utf-8'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">)</span>\n        search_con <span class=\"token operator\">=</span> result_lxml<span class=\"token punctuation\">.</span>xpath<span class=\"token punctuation\">(</span><span class=\"token string\">'//div[@class=\"con\"]'</span><span class=\"token punctuation\">)</span>\n        <span class=\"token keyword\">for</span> i_con <span class=\"token keyword\">in</span> search_con<span class=\"token punctuation\">:</span>\n            result_list<span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span><span class=\"token punctuation\">[</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">)</span>\n            result_list<span class=\"token punctuation\">[</span><span class=\"token builtin\">len</span><span class=\"token punctuation\">(</span>result_list<span class=\"token punctuation\">)</span> <span class=\"token operator\">-</span> <span class=\"token number\">1</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span>i_con<span class=\"token punctuation\">.</span>xpath<span class=\"token punctuation\">(</span><span class=\"token string\">'h1/a/text()'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">)</span>\n            result_list<span class=\"token punctuation\">[</span><span class=\"token builtin\">len</span><span class=\"token punctuation\">(</span>result_list<span class=\"token punctuation\">)</span> <span class=\"token operator\">-</span> <span class=\"token number\">1</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span>i_con<span class=\"token punctuation\">.</span>xpath<span class=\"token punctuation\">(</span><span class=\"token string\">'div/text()'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">)</span>\n            result_list<span class=\"token punctuation\">[</span><span class=\"token builtin\">len</span><span class=\"token punctuation\">(</span>result_list<span class=\"token punctuation\">)</span> <span class=\"token operator\">-</span> <span class=\"token number\">1</span><span class=\"token punctuation\">]</span><span class=\"token punctuation\">.</span>append<span class=\"token punctuation\">(</span>i_con<span class=\"token punctuation\">.</span>xpath<span class=\"token punctuation\">(</span><span class=\"token string\">'h1/a/@href'</span><span class=\"token punctuation\">)</span><span class=\"token punctuation\">)</span>\n    <span class=\"token keyword\">return</span> result_list\n</code></pre>\n<p><strong>个人感觉Xpath比较好理解一些</strong></p>\n<h4><a id=\"PS_122\"></a>PS：正作为一名大数据技术方向的大学生学习中,我也会把我学习中完成的一些任务实训发表出来和大家一起学习，还请大家请多多指教。</h4>\n\n      \n  </div>\n    <div class=\"postTime\">\n        文章最后发布于: 2019-12-15    </div>\n  <script src=\"https://csdnimg.cn/public/common/libs/jquery/jquery-1.9.1.min.js\" type=\"text/javascript\"></script>\n    <script>\n    if(window.location.search){\n      $('#goto_vip_btn').attr('href','https://app.csdn.net/vip/mall' + window.location.search )\n    }\n  </script>\n  <script>\n    // 解决安卓手机无法展示MD文章超长代码片问题\n    $(function(){\n      $(\"div.markdown_views\").find('pre code').each(function(idx, el){\n        el = $(el)\n        var newDiv = $('<div></div>')\n        newDiv.addClass(el.attr('class'))\n        newDiv.html(el.html())\n        newDiv.css({\n          position: 'relative'\n        })\n        el.html('')\n        el.append(newDiv)\n        el.removeAttr('class')\n      })\n    })\n  </script>\n    <script  type=\"text/javascript\">\n    $(function () {\n      var observer = lozad('.article_content img', {\n          rootMargin: '10px 0px', // syntax similar to that of CSS Margin\n          threshold: 0.1 // ratio of element convergence\n      });\n      observer.observe();\n      if($('div.markdown_views pre.prettyprint code.hljs').length > 0 ){\n        $('div.markdown_views')[0].className = 'markdown_views ';\n      }\n      dp.SyntaxHighlighter.HighlightAll('pre');\n    });\n  </script>\n    <script type=\"text/javascript\" src=\"https://csdnimg.cn/release/phoenix/production/markdownCopy-626b79893b.js\"></script>\n  <script  type=\"text/javascript\">\n      $(function () {\n        $(\".MathJax\").remove();\n        dp.SyntaxHighlighter.HighlightAll('pre');\n      });\n  </script> \n  <script type=\"text/x-mathjax-config\">\n      MathJax.Hub.Config({\n        \"HTML-CSS\": {\n            linebreaks: { automatic: true, width: \"94%container\" },\n            imageFont: null\n        },\n        tex2jax: {\n          preview: \"none\"\n        },\n        mml2jax: {\n          preview: 'none'\n        },\n        messageStyle: \"none\"\n    });\n  </script>\n  <script type=\"text/javascript\" src=\"https://csdnimg.cn/release/blog_mathjax/MathJax.js?config=TeX-AMS-MML_HTMLorMML\"></script>\n  <script type=\"text/javascript\" src=\"https://csdnimg.cn/release/phoenix/production/app_blog_article-24440deba9.js\"></script>\n  <script>\n      $.get(\"https://blog.csdn.net/weixin_43606442/app/article/uvc/103547561\");\n  </script></div>\n<script>\n  $(function(){\n    var imgList = $(\"#content_views\").find(\"img\");\n    setTimeout(function(){\n      var imgFirst = imgList.eq(0)\n      var imgUrl = imgFirst.attr(\"src\");\n      var imgDataUrl = imgFirst.data(\"src\");\n      if(imgUrl != imgDataUrl){\n        imgList.each(function(idx, el) {\n          $(el).attr('src', $(el).data(\"src\"))\n        })\n      }\n    }, 1500);\n  })\n</script>\n</body>\n</html>\n", "text/html", "utf-8", null);
        NBSAppInstrumentation.activityCreateEndIns();
    }

    @Override // android.support.v7.app.AppCompatActivity, android.app.Activity, android.view.KeyEvent.Callback
    public boolean onKeyDown(int i, KeyEvent keyEvent) {
        NBSActionInstrumentation.onKeyDownAction(i, getClass().getName());
        return super.onKeyDown(i, keyEvent);
    }

    @Override // android.app.Activity
    protected void onRestart() {
        NBSAppInstrumentation.activityRestartBeginIns(getClass().getName());
        super.onRestart();
        NBSAppInstrumentation.activityRestartEndIns();
    }

    @Override // net.csdn.csdnplus.activity.BaseActivity, android.support.v4.app.FragmentActivity, android.app.Activity
    public void onResume() {
        NBSAppInstrumentation.activityResumeBeginIns(getClass().getName());
        super.onResume();
        NBSAppInstrumentation.activityResumeEndIns();
    }

    @Override // android.support.v7.app.AppCompatActivity, android.support.v4.app.FragmentActivity, android.app.Activity
    public void onStart() {
        NBSApplicationStateMonitor.getInstance().activityStarted(getClass().getName());
        super.onStart();
        NBSAppInstrumentation.activityStartEndIns();
    }

    @Override // android.support.v7.app.AppCompatActivity, android.support.v4.app.FragmentActivity, android.app.Activity
    public void onStop() {
        NBSApplicationStateMonitor.getInstance().activityStopped(getClass().getName());
        super.onStop();
    }
}
