您现在的位置是:首页 > 文章详情

我自己开发的工具,打印出百度贴吧某用户发表过的所有帖子

日期:2020-01-24点击:288
<html> <meta charset="UTF-8"/> <style> a { color: green; font-family: arial; font-weight: bold } </style> <body> <div id="container"></div> </body> <script src="jquery1.7.1.js"> /* Jerry 2017-02-06 14:58PM update should use C:\MyApp\Chrome\Application\chrome.exe --user-data-dir="C:/yaas" --disable-web-security and then FIRST LOG ON BAIDU successfully!!!! */ </script> <script> /* Jerry 2017-02-05 5:54PM 这个警告的意思是说:请求的资源可能会被(扩展/或其他什么机制)屏蔽掉。 之所以会出现这个警告,是因为去获取该资源的请求其实并(还)没有真的发生,所以 Header 里显示的是伪信息,直到服务器真的有响应返回,这里的 Header 信息才会被更新为真实的。不过这一切也可能不会发生,因为该请求可能会被屏蔽。比如说 AdBlock 什么的,当然了不全是浏览器扩展,具体情况具体分析了。 对了,别忘了用 chrome://net-internals 来帮助你查找被屏蔽的请求以及可能的原因。 */ var PREFIX = "http://tieba.baidu.com"; var START = "http://tieba.baidu.com/i/i/my_tie"; //var START = "http://www.baidu.com"; var POST = {}; var TOTAL = 0; var SORTED = []; function getTotalCount(collection){ var count = 0; for( bar in collection){ if( !collection.hasOwnProperty(bar)) continue; var postList = collection[bar]; count += postList.length; } return count; } function shouldEnd(previousCount) { TOTAL = getTotalCount(POST); console.log("pre: " + previousCount + " total: " + TOTAL); return ( previousCount == TOTAL ); } function main() { var html = getPostByAJAX(START); handleLiChildren(html); var page = 2; while(1){ var prevCount = getTotalCount(POST); var task = START + "?&pn=" + page; var html = getPostByAJAX(task); handleLiChildren(html); page++; /* if( page >=2 ) break;*/ if( shouldEnd(prevCount) ) break; } sort(); generate(); } function handleLiChildren(resultString){ var htmlDom = $(resultString); var liChildren = $("li", htmlDom); $.each( liChildren, function(i, value) { // if( value.className.indexOf("nav_item") != -1 ) if( value.className) return true; if( value.innerText == "我回复的" || value.innerText == "我的精品") return true; var detail = parseDetail(value); insertPost(detail); }); } /* <ul> <li> <cite>2016</cite> <a href="/f?kw=%E5%A4%A7%E9%82%91" >尿素氮</a> </li> <li> <cite>2015</cite> <a href="/f?kw=%E5%A4%A7%E9%82%91" >尿素氮2</a> </li> </ul> */ function getpostSource(post) { var source = "<li><cite>"; source += post.date + "/<cite>"; source += '<a href="' + post.url + '">' + post.postTitle + "</a></li>"; return source; } function getBarPostsSource(barName, posts) { var source = '<h1>' + barName + ': ' + posts.length + '个</h1>'; source += "<ul>"; for( var i = 0; i < posts.length; i++){ var post = posts[i]; source += getpostSource(post); } source += "</ul>"; return source; } function sortNumber(a,b){ return b.size - a.size; } function sort() { for( barName in POST) { if( !POST.hasOwnProperty(barName)) continue; var post = { name: barName, size: POST[barName].length }; SORTED.push(post); } SORTED.sort(sortNumber); } function generate(){ var div = document.getElementById("container"); var source = "总共帖子: " + TOTAL + "个"; for( var i = 0; i < SORTED.length; i++){ var posts = POST[SORTED[i].name]; source += getBarPostsSource(SORTED[i].name, posts); } div.innerHTML = source; } $(function(){ main(); }); function getPostByAJAX(requestURL){ var html = $.ajax({ url: requestURL, xhrFields: { // The 'xhrFields' property sets additional fields on the XMLHttpRequest. // This can be used to set the 'withCredentials' property. // Set the value to 'true' if you'd like to pass cookies to the server. // If this is enabled, your server must respond with the header // 'Access-Control-Allow-Credentials: true'. withCredentials: true }, async: false}).responseText; debugger; return html; } /* function getPostByAJAX(requestURL){ var settings = { type: "GET", crossOrigin: true, url:requestURL, error: function(XHR,textStatus,errorThrown) { alert ("XHR="+XHR+"\ntextStatus="+textStatus+"\nerrorThrown=" + errorThrown); }, success: function(data,textStatus) { debugger; }, headers: { "Access-Control-Allow-Origin":"http://tieba.baidu.com", "Access-Control-Allow-Headers":"X-Requested-With" } }; $.ajax(settings); } */ /* function getPostByAJAX(requestURL){ var html = $.ajax({ url: requestURL, dataType:"jsonp", xhrFields: { // The 'xhrFields' property sets additional fields on the XMLHttpRequest. // This can be used to set the 'withCredentials' property. // Set the value to 'true' if you'd like to pass cookies to the server. // If this is enabled, your server must respond with the header // 'Access-Control-Allow-Credentials: true'. withCredentials: true }, async: false}).responseText; return html; } */ function insertPost(postDetail){ if( !POST[postDetail.barName]){ POST[postDetail.barName] = []; } POST[postDetail.barName].push(postDetail); } function parseDetail(liNode) { var cite = $("cite", liNode); var date = cite[0].innerHTML; // value1 var tds = $("td", liNode); var a1 = $("a", tds[0]); var barName = a1[0].innerHTML; // value2 var a2 = $("a", tds[1]); var postTitle = a2[0].innerHTML; // value3 var url = PREFIX + a2.attr("href"); return { date: date, barName: barName, postTitle: postTitle, url: url } } function getTestData(){ return '<!DOCTYPE html><html><body><div class="wrap1"><div class="wrap2"><div ' + ' id="main_wrapper" class="main_wrapper"><div id="main_back_img"><div ' + ' id="main_back_bottom"><div id="container" class="ibody clearfix"><div><div ' + ' id="content"><div class="simple_block_container"><ul><li><cite>2-16</cite>' + '<div class="wrap_container"><table><tr><td class="nowrap">在<a style="" ' + ' href="/f?kw=%E5%A4%A7%E9%82%91" target="_blank">ANDROID吧</a> 发贴</td><td class="wrap">' + '<a href="/p/4356641476?pid=84106363194&amp;cid=0#841063631" class="thread_title" target="_blank">硬盘</a></td>' + '</tr></table></div><div class="clear"></div></li>' + '<li></li><li></li></ul></div></div></div></div></div></div></div></div></body></html>'; } </script> </html>

本文来自云栖社区合作伙伴“汪子熙”,了解相关信息可以关注微信公众号"汪子熙"。

原文链接:https://yq.aliyun.com/articles/743212
关注公众号

低调大师中文资讯倾力打造互联网数据资讯、行业资源、电子商务、移动互联网、网络营销平台。

持续更新报道IT业界、互联网、市场资讯、驱动更新,是最及时权威的产业资讯及硬件资讯报道平台。

转载内容版权归作者及来源网站所有,本站原创内容转载请注明来源。

文章评论

共有0条评论来说两句吧...

文章二维码

扫描即可查看该文章

点击排行

推荐阅读

最新文章