{"id":1185,"date":"2024-11-16T19:14:33","date_gmt":"2024-11-16T11:14:33","guid":{"rendered":"https:\/\/gemmartdesign.com\/?p=1185"},"modified":"2024-11-29T20:29:15","modified_gmt":"2024-11-29T12:29:15","slug":"python-post11","status":"publish","type":"post","link":"https:\/\/gemmartdesign.com\/?p=1185","title":{"rendered":"Python\u554f\u984c\u7d00\u9304 #11-Web Crawler\u8a2a\u554f\u7db2\u7ad9"},"content":{"rendered":"\n<h2 class=\"wp-block-heading\">\u672c\u5468\u76ee\u6a19<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b78\u7fd2WebCrawler\u57fa\u672c\u7528\u6cd5\u8207\u61c9\u7528<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4efb\u52d9<\/h2>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u5982\u4f55\u8a2a\u554f\u7db2\u7ad9\u52a0\u5165\u700f\u89bd\u5668\u8cc7\u8a0a\u4e26\u53d6\u5f97\u7db2\u7ad9\u8cc7\u6599<\/li>\n\n\n\n<li>\u4f7f\u7528Beautifulsoup\u6293\u53d6\u7db2\u7ad9\u6a19\u984c<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">\u5c08\u6848\u7df4\u7fd2<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">\u9047\u5230\u554f\u984c<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5df2\u7d93\u52a0\u5165\u700f\u89bd\u5668\u8cc7\u6599\u907f\u514d\u88ab\u6a5f\u5668\u4eba\u963b\u64cb\uff0c\u4f46\u8cc7\u6599\u4f9d\u7136\u8dd1\u4e0d\u51fa\u4f86<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import urllib.request as req\nurl=\"https:\/\/www.ptt.cc\/bbs\/movie\/index.html\"\nrequest=req.Request(url, headers={\n    \"user-agent\":\"Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/130.0.0.0 Safari\/537.36\"\n})\nwith req.urlopen(url) as response:\n    data=response.read().decode(\"utf-8\")\nprint(data)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">Log<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u932f\u8aa4\u8a0a\u606f: \u4e00\u6a23\u8df3\u51faForbidden<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>urllib.error.HTTPError: HTTP Error 403: Forbidden<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">\u6aa2\u67e5\u539f\u59cb\u78bc\u767c\u73fe:<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">with req.urlopen(url) as response:<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">url\u6c92\u6709\u6539\u6210<strong>request<\/strong>\uff0c\u4fee\u6b63\u5f8c\u53ef\u6293\u5230<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u4fee\u6b63\u5f8c\u539f\u59cb\u78bc<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>import urllib.request as req\nurl=\"https:\/\/www.ptt.cc\/bbs\/movie\/index.html\"\nrequest=req.Request(url, headers={\n    \"user-agent\":\"Mozilla\/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/130.0.0.0 Safari\/537.36\"\n})\nwith req.urlopen(request) as response:\n    data=response.read().decode(\"utf-8\")\nprint(data)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">Output<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>\u7a0b\u5f0f\u78bc\u592a\u591a\u4e0d\u8907\u88fd\u8cbc\u4e0a<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u672c\u5468\u76ee\u6a19 \u5b78\u7fd2WebCrawler\u57fa\u672c\u7528\u6cd5\u8207\u61c9\u7528 \u4efb\u52d9 \u5c08\u6848&hellip;<\/p>\n","protected":false},"author":1,"featured_media":1233,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"content-type":"","pagelayer_contact_templates":[],"_pagelayer_content":"","_lmt_disableupdate":"","_lmt_disable":"","footnotes":""},"categories":[9],"tags":[],"class_list":["post-1185","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-python"],"_links":{"self":[{"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=\/wp\/v2\/posts\/1185","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1185"}],"version-history":[{"count":7,"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=\/wp\/v2\/posts\/1185\/revisions"}],"predecessor-version":[{"id":1235,"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=\/wp\/v2\/posts\/1185\/revisions\/1235"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=\/wp\/v2\/media\/1233"}],"wp:attachment":[{"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1185"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1185"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gemmartdesign.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1185"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}