首页 文章 精选 留言 我的

精选列表

搜索[加密工具],共10000篇文章
优秀的个人博客,低调大师

我自己开发的工具,打印出百度贴吧某用户发表过的所有帖子

<html> <meta charset="UTF-8"/> <style> a { color: green; font-family: arial; font-weight: bold } </style> <body> <div id="container"></div> </body> <script src="jquery1.7.1.js"> /* Jerry 2017-02-06 14:58PM update should use C:\MyApp\Chrome\Application\chrome.exe --user-data-dir="C:/yaas" --disable-web-security and then FIRST LOG ON BAIDU successfully!!!! */ </script> <script> /* Jerry 2017-02-05 5:54PM 这个警告的意思是说:请求的资源可能会被(扩展/或其他什么机制)屏蔽掉。 之所以会出现这个警告,是因为去获取该资源的请求其实并(还)没有真的发生,所以 Header 里显示的是伪信息,直到服务器真的有响应返回,这里的 Header 信息才会被更新为真实的。不过这一切也可能不会发生,因为该请求可能会被屏蔽。比如说 AdBlock 什么的,当然了不全是浏览器扩展,具体情况具体分析了。 对了,别忘了用 chrome://net-internals 来帮助你查找被屏蔽的请求以及可能的原因。 */ var PREFIX = "http://tieba.baidu.com"; var START = "http://tieba.baidu.com/i/i/my_tie"; //var START = "http://www.baidu.com"; var POST = {}; var TOTAL = 0; var SORTED = []; function getTotalCount(collection){ var count = 0; for( bar in collection){ if( !collection.hasOwnProperty(bar)) continue; var postList = collection[bar]; count += postList.length; } return count; } function shouldEnd(previousCount) { TOTAL = getTotalCount(POST); console.log("pre: " + previousCount + " total: " + TOTAL); return ( previousCount == TOTAL ); } function main() { var html = getPostByAJAX(START); handleLiChildren(html); var page = 2; while(1){ var prevCount = getTotalCount(POST); var task = START + "?&pn=" + page; var html = getPostByAJAX(task); handleLiChildren(html); page++; /* if( page >=2 ) break;*/ if( shouldEnd(prevCount) ) break; } sort(); generate(); } function handleLiChildren(resultString){ var htmlDom = $(resultString); var liChildren = $("li", htmlDom); $.each( liChildren, function(i, value) { // if( value.className.indexOf("nav_item") != -1 ) if( value.className) return true; if( value.innerText == "我回复的" || value.innerText == "我的精品") return true; var detail = parseDetail(value); insertPost(detail); }); } /* <ul> <li> <cite>2016</cite> <a href="/f?kw=%E5%A4%A7%E9%82%91" >尿素氮</a> </li> <li> <cite>2015</cite> <a href="/f?kw=%E5%A4%A7%E9%82%91" >尿素氮2</a> </li> </ul> */ function getpostSource(post) { var source = "<li><cite>"; source += post.date + "/<cite>"; source += '<a href="' + post.url + '">' + post.postTitle + "</a></li>"; return source; } function getBarPostsSource(barName, posts) { var source = '<h1>' + barName + ': ' + posts.length + '个</h1>'; source += "<ul>"; for( var i = 0; i < posts.length; i++){ var post = posts[i]; source += getpostSource(post); } source += "</ul>"; return source; } function sortNumber(a,b){ return b.size - a.size; } function sort() { for( barName in POST) { if( !POST.hasOwnProperty(barName)) continue; var post = { name: barName, size: POST[barName].length }; SORTED.push(post); } SORTED.sort(sortNumber); } function generate(){ var div = document.getElementById("container"); var source = "总共帖子: " + TOTAL + "个"; for( var i = 0; i < SORTED.length; i++){ var posts = POST[SORTED[i].name]; source += getBarPostsSource(SORTED[i].name, posts); } div.innerHTML = source; } $(function(){ main(); }); function getPostByAJAX(requestURL){ var html = $.ajax({ url: requestURL, xhrFields: { // The 'xhrFields' property sets additional fields on the XMLHttpRequest. // This can be used to set the 'withCredentials' property. // Set the value to 'true' if you'd like to pass cookies to the server. // If this is enabled, your server must respond with the header // 'Access-Control-Allow-Credentials: true'. withCredentials: true }, async: false}).responseText; debugger; return html; } /* function getPostByAJAX(requestURL){ var settings = { type: "GET", crossOrigin: true, url:requestURL, error: function(XHR,textStatus,errorThrown) { alert ("XHR="+XHR+"\ntextStatus="+textStatus+"\nerrorThrown=" + errorThrown); }, success: function(data,textStatus) { debugger; }, headers: { "Access-Control-Allow-Origin":"http://tieba.baidu.com", "Access-Control-Allow-Headers":"X-Requested-With" } }; $.ajax(settings); } */ /* function getPostByAJAX(requestURL){ var html = $.ajax({ url: requestURL, dataType:"jsonp", xhrFields: { // The 'xhrFields' property sets additional fields on the XMLHttpRequest. // This can be used to set the 'withCredentials' property. // Set the value to 'true' if you'd like to pass cookies to the server. // If this is enabled, your server must respond with the header // 'Access-Control-Allow-Credentials: true'. withCredentials: true }, async: false}).responseText; return html; } */ function insertPost(postDetail){ if( !POST[postDetail.barName]){ POST[postDetail.barName] = []; } POST[postDetail.barName].push(postDetail); } function parseDetail(liNode) { var cite = $("cite", liNode); var date = cite[0].innerHTML; // value1 var tds = $("td", liNode); var a1 = $("a", tds[0]); var barName = a1[0].innerHTML; // value2 var a2 = $("a", tds[1]); var postTitle = a2[0].innerHTML; // value3 var url = PREFIX + a2.attr("href"); return { date: date, barName: barName, postTitle: postTitle, url: url } } function getTestData(){ return '<!DOCTYPE html><html><body><div class="wrap1"><div class="wrap2"><div ' + ' id="main_wrapper" class="main_wrapper"><div id="main_back_img"><div ' + ' id="main_back_bottom"><div id="container" class="ibody clearfix"><div><div ' + ' id="content"><div class="simple_block_container"><ul><li><cite>2-16</cite>' + '<div class="wrap_container"><table><tr><td class="nowrap">在<a style="" ' + ' href="/f?kw=%E5%A4%A7%E9%82%91" target="_blank">ANDROID吧</a> 发贴</td><td class="wrap">' + '<a href="/p/4356641476?pid=84106363194&amp;cid=0#841063631" class="thread_title" target="_blank">硬盘</a></td>' + '</tr></table></div><div class="clear"></div></li>' + '<li></li><li></li></ul></div></div></div></div></div></div></div></div></body></html>'; } </script> </html> 本文来自云栖社区合作伙伴“汪子熙”,了解相关信息可以关注微信公众号"汪子熙"。

优秀的个人博客,低调大师

[雪峰磁针石博客]selenium自动化测试工具python笔试面试项目实战5键盘操作

说明 本文参考答案基于Chrome,分辨率1920*1080,在其他环境表现可能会不同。本文代码地址 参考书籍下载: Learning Selenium Testing Tools with Python-2014.pdf Selenium自动化测试 基于 Python 语言 - 2018.pdf 上机实操: 在新的TAB打开连接 打开:https://china-testing.github.io/ 选择"数据分析"栏目的文章 按住"Ctrl+TAB"选择"python"栏目的文章 切换到新的标签"python" 关闭新的标签"python" 关闭浏览器 参考答案 #!/usr/bin/python3 # -*- coding: utf-8 -*- # 讨论钉钉免费群21745728 qq群144081101 567351477 # CreateDate: 2018-10-17 import time from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys driver = webdriver.Chrome() driver.get("https://china-testing.github.io/") driver.implicitly_wait(30) driver.maximize_window() element = driver.find_element_by_link_text('数据分析') element.click() time.sleep(3) element = driver.find_element_by_link_text('python') ActionChains(driver).key_down(Keys.CONTROL).click(element).key_up( Keys.CONTROL).perform() time.sleep(3) driver.switch_to.window(driver.window_handles[1]) time.sleep(3) driver.close() # 关闭当前TAB time.sleep(3) driver.quit() 面试问答 driver.quit() 和 driver.close()有什么区别? 2.selenium中按下和松开键如何表示? 3.简述ActionChains类的作用? 上机实操: 验证悬浮提示内容 打开:http://jqueryui.com/tooltip/ 鼠标移动到上图的"Your age:" 确认悬浮提示内容为'We ask for your age only for statistical purposes.' 关闭浏览器 参考答案 #!/usr/bin/python3 # -*- coding: utf-8 -*- # 讨论钉钉免费群21745728 qq群144081101 567351477 # CreateDate: 2018-10-17 import unittest import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions from selenium.webdriver.common.action_chains import ActionChains class ToolTipTest (unittest.TestCase): def setUp(self): self.driver = webdriver.Chrome() self.driver.get("http://jqueryui.com/tooltip/") self.driver.implicitly_wait(30) self.driver.maximize_window() def test_tool_tip(self): driver = self.driver frame_elm = driver.find_element_by_class_name('demo-frame') driver.switch_to.frame(frame_elm) time.sleep(3) age_field = driver.find_element_by_id('age') ActionChains(self.driver).move_to_element(age_field).perform() time.sleep(3) tool_tip_elm = WebDriverWait(self.driver, 10).until( expected_conditions.visibility_of_element_located(( By.CLASS_NAME, 'ui-tooltip-content'))) # verify tooltip message self.assertEqual('We ask for your age only for statistical purposes.', tool_tip_elm.text) time.sleep(3) def tearDown(self): self.driver.close() if __name__ == '__main__': unittest.main(verbosity=2) 面试问答 1.move_to_element()有什么用途? 上机实操: 双击改变颜色 打开:http://api.jquery.com/dblclick/ 双击上图蓝色的框,把颜色该变成黄色 参考答案 #!/usr/bin/python3 # -*- coding: utf-8 -*- # 讨论钉钉免费群21745728 qq群144081101 567351477 # CreateDate: 2018-10-18 from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import unittest class DoubleClickTest (unittest.TestCase): URL = 'http://api.jquery.com/dblclick/' def setUp(self): self.driver = webdriver.Chrome() self.driver.get(self.URL) self.driver.maximize_window() def test_double_click(self): driver = self.driver frame = driver.find_element_by_tag_name('iframe') driver.switch_to.frame(frame) box = driver.find_element_by_tag_name('div') # verify color is Blue self.assertEqual('rgba(0, 0, 255, 1)', box.value_of_css_property('background-color')) ActionChains(driver).move_to_element( driver.find_element_by_tag_name('body')).perform() ActionChains(driver).double_click(box).perform() # verify Color is Yellow self.assertEqual('rgba(255, 255, 0, 1)', box.value_of_css_property('background-color')) def tearDown(self): self.driver.close() if __name__ == '__main__': unittest.main(verbosity=2) 面试问答 1.double_click()有什么用途? 2.rgba的含义? 上机实操: 在新的TAB打开连接 打开:http://jqueryui.com/resources/demos/droppable/default.html 拖动左边的框到右边 参考答案 #!/usr/bin/python3 # -*- coding: utf-8 -*- # 讨论钉钉免费群21745728 qq群144081101 567351477 # CreateDate: 2018-10-18 from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import unittest class DragAndDropTest (unittest.TestCase): URL = 'http://jqueryui.com/resources/demos/droppable/default.html' def setUp(self) : self.driver = webdriver.Chrome() self.driver.get(self.URL) self.driver.maximize_window() def test_drag_and_drop(self): driver = self.driver source = driver.find_element_by_id('draggable') target = driver.find_element_by_id('droppable') ActionChains(self.driver).drag_and_drop(source, target).perform() self.assertEqual('Dropped!', target.text) def tearDown(self): self.driver.close() if __name__ == '__main__': unittest.main(verbosity=2) 面试问答 1.drag_and_drop()有什么用途? 参考资料 讨论 钉钉群21745728 qq群144081101 567351477 本文最新版本地址 本文涉及的python测试开发库 谢谢点赞! 本文相关海量书籍下载

优秀的个人博客,低调大师

[雪峰磁针石博客]数据分析工具pandas快速入门教程2-pandas数据结构

创建数据 Series和python的列表类似。DataFrame则类似值为Series的字典。 create.py #!/usr/bin/env python3 # -*- coding: utf-8 -*- # create.py import pandas as pd print("\n\n创建序列Series") s = pd.Series(['banana', 42]) print(s) print("\n\n指定索引index创建序列Series") s = pd.Series(['Wes McKinney', 'Creator of Pandas'], index=['Person', 'Who']) print(s) # 注意:列名未必为执行的顺序,通常为按字母排序 print("\n\n创建数据帧DataFrame") scientists = pd.DataFrame({ ' Name': ['Rosaline Franklin', 'William Gosset'], ' Occupation': ['Chemist', 'Statistician'], ' Born': ['1920-07-25', '1876-06-13'], ' Died': ['1958-04-16', '1937-10-16'], ' Age': [37, 61]}) print(scientists) print("\n\n指定顺序(index和columns)创建数据帧DataFrame") scientists = pd.DataFrame( data={'Occupation': ['Chemist', 'Statistician'], 'Born': ['1920-07-25', '1876-06-13'], 'Died': ['1958-04-16', '1937-10-16'], 'Age': [37, 61]}, index=['Rosaline Franklin', 'William Gosset'], columns=['Occupation', 'Born', 'Died', 'Age']) print(scientists) 执行结果: $ ./create.py 创建序列Series 0 banana 1 42 dtype: object 指定索引index创建序列Series Person Wes McKinney Who Creator of Pandas dtype: object 创建数据帧DataFrame Name Occupation Born Died Age 0 Rosaline Franklin Chemist 1920-07-25 1958-04-16 37 1 William Gosset Statistician 1876-06-13 1937-10-16 61 指定顺序(index和columns)创建数据帧DataFrame Occupation Born Died Age Rosaline Franklin Chemist 1920-07-25 1958-04-16 37 William Gosset Statistician 1876-06-13 1937-10-16 61 Series 官方文档:http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html Series的属性 属性 描述 loc 使用索引值获取子集 iloc 使用索引位置获取子集 dtype或dtypes 类型 T 转置 shape 数据的尺寸 size 元素的数量 values ndarray或类似ndarray的Series Series的方法 方法 描述 append 连接2个或更多系列 corr 计算与其他Series的关联 cov 与其他Series计算协方差 describe 计算汇总统计 drop duplicates 返回一个没有重复项的Series equals Series是否具有相同的元素 get values 获取Series的值,与values属性相同 hist 绘制直方图 min 返回最小值 max 返回最大值 mean 返回算术平均值 median 返回中位数 mode(s) 返回mode(s) replace 用指定值替换系列中的值 sample 返回Series中值的随机样本 sort values 排序 to frame 转换为数据帧 transpose 返回转置 unique 返回numpy.ndarray唯一值 series.py #!/usr/bin/python3 # -*- coding: utf-8 -*- # CreateDate: 2018-3-14 # series.py import pandas as pd import numpy as np scientists = pd.DataFrame( data={'Occupation': ['Chemist', 'Statistician'], 'Born': ['1920-07-25', '1876-06-13'], 'Died': ['1958-04-16', '1937-10-16'], 'Age': [37, 61]}, index=['Rosaline Franklin', 'William Gosset'], columns=['Occupation', 'Born', 'Died', 'Age']) print(scientists) # 从数据帧(DataFrame)获取的行或者列为Series first_row = scientists.loc['William Gosset'] print(type(first_row)) print(first_row) # index和keys是一样的 print(first_row.index) print(first_row.keys()) print(first_row.values) print(first_row.index[0]) print(first_row.keys()[0]) # Pandas.Series和numpy.ndarray很类似 ages = scientists['Age'] print(ages) # 统计,更多参考http://pandas.pydata.org/pandas-docs/stable/basics.html#descriptive-statistics print(ages.mean()) print(ages.min()) print(ages.max()) print(ages.std()) scientists = pd.read_csv('../data/scientists.csv') ages = scientists['Age'] print(ages) print(ages.mean()) print(ages.describe()) print(ages[ages > ages.mean()]) print(ages > ages.mean()) manual_bool_values = [True, True, False, False, True, True, False, False] print(ages[manual_bool_values]) print(ages + ages) print(ages * ages) print(ages + 100) print(ages * 2) print(ages + pd.Series([1, 100])) # print(ages + np.array([1, 100])) 会报错,不同类型相加,大小一定要一样 print(ages + np.array([1, 100, 1, 100, 1, 100, 1, 100])) # 排序: 默认有自动排序 print(ages) rev_ages = ages.sort_index(ascending=False) print(rev_ages) print(ages * 2) print(ages + rev_ages) 执行结果 $ python3 series.py Occupation Born Died Age Rosaline Franklin Chemist 1920-07-25 1958-04-16 37 William Gosset Statistician 1876-06-13 1937-10-16 61 <class 'pandas.core.series.Series'> Occupation Statistician Born 1876-06-13 Died 1937-10-16 Age 61 Name: William Gosset, dtype: object Index(['Occupation', 'Born', 'Died', 'Age'], dtype='object') Index(['Occupation', 'Born', 'Died', 'Age'], dtype='object') ['Statistician' '1876-06-13' '1937-10-16' 61] Occupation Occupation Rosaline Franklin 37 William Gosset 61 Name: Age, dtype: int64 49.0 37 61 16.97056274847714 0 37 1 61 2 90 3 66 4 56 5 45 6 41 7 77 Name: Age, dtype: int64 59.125 count 8.000000 mean 59.125000 std 18.325918 min 37.000000 25% 44.000000 50% 58.500000 75% 68.750000 max 90.000000 Name: Age, dtype: float64 1 61 2 90 3 66 7 77 Name: Age, dtype: int64 0 False 1 True 2 True 3 True 4 False 5 False 6 False 7 True Name: Age, dtype: bool 0 37 1 61 4 56 5 45 Name: Age, dtype: int64 0 74 1 122 2 180 3 132 4 112 5 90 6 82 7 154 Name: Age, dtype: int64 0 1369 1 3721 2 8100 3 4356 4 3136 5 2025 6 1681 7 5929 Name: Age, dtype: int64 0 137 1 161 2 190 3 166 4 156 5 145 6 141 7 177 Name: Age, dtype: int64 0 74 1 122 2 180 3 132 4 112 5 90 6 82 7 154 Name: Age, dtype: int64 0 38.0 1 161.0 2 NaN 3 NaN 4 NaN 5 NaN 6 NaN 7 NaN dtype: float64 0 38 1 161 2 91 3 166 4 57 5 145 6 42 7 177 Name: Age, dtype: int64 0 37 1 61 2 90 3 66 4 56 5 45 6 41 7 77 Name: Age, dtype: int64 7 77 6 41 5 45 4 56 3 66 2 90 1 61 0 37 Name: Age, dtype: int64 0 74 1 122 2 180 3 132 4 112 5 90 6 82 7 154 Name: Age, dtype: int64 0 74 1 122 2 180 3 132 4 112 5 90 6 82 7 154 Name: Age, dtype: int64 数据帧(DataFrame) DataFrame是最常见的Pandas对象,可认为是Python存储类似电子表格的数据的方式。Series多常见功能都包含在DataFrame中。 子集的方法 注意ix现在已经不推荐使用。 DataFrame常用的索引操作有: 方式 描述 df[val] 选择单个列 df [[ column1, column2, ... ]] 选择多个列 df.loc[val] 选择行 loc [[ label1 , label2 ,...]] | 选择多行 |df.loc[:, val] | 基于行index选择列 | df.loc[val1, val2] | 选择行列 |df.iloc[row number] | 基于行数选择行 | iloc [[ row1, row2, ...]] Multiple rows by row number | 基于行数选择多行 |df.iloc[:, where] | 选择列 | df.iloc[where_i, where_j] | 选择行列 |df.at[label_i, label_j] | 选择值 |df.iat[i, j] | 选择值 |reindex method | 通过label选择多行或列 |get_value, set_value | 通过label选择耽搁行或列 df[bool] | 选择行df [[ bool1, bool2, ...]] | 选择行df[ start :stop: step ] | 基于行数选择行 #!/usr/bin/python3 # -*- coding: utf-8 -*- # CreateDate: 2018-3-31 # df.py import pandas as pd import numpy as np scientists = pd.read_csv('../data/scientists.csv') print(scientists[scientists['Age'] > scientists['Age'].mean()]) first_half = scientists[: 4] second_half = scientists[ 4 :] print(first_half) print(second_half) print(first_half + second_half) print(scientists * 2) 执行结果 #!/usr/bin/python3 # -*- coding: utf-8 -*- # df.py import pandas as pd import numpy as np scientists = pd.read_csv('../data/scientists.csv') print(scientists[scientists['Age'] > scientists['Age'].mean()]) first_half = scientists[: 4] second_half = scientists[ 4 :] print(first_half) print(second_half) print(first_half + second_half) print(scientists * 2) 执行结果 $ python3 df.py Name Born Died Age Occupation 1 William Gosset 1876-06-13 1937-10-16 61 Statistician 2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse 3 Marie Curie 1867-11-07 1934-07-04 66 Chemist 7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician Name Born Died Age Occupation 0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist 1 William Gosset 1876-06-13 1937-10-16 61 Statistician 2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse 3 Marie Curie 1867-11-07 1934-07-04 66 Chemist Name Born Died Age Occupation 4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist 5 John Snow 1813-03-15 1858-06-16 45 Physician 6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist 7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician Name Born Died Age Occupation 0 NaN NaN NaN NaN NaN 1 NaN NaN NaN NaN NaN 2 NaN NaN NaN NaN NaN 3 NaN NaN NaN NaN NaN 4 NaN NaN NaN NaN NaN 5 NaN NaN NaN NaN NaN 6 NaN NaN NaN NaN NaN 7 NaN NaN NaN NaN NaN Name Born \ 0 Rosaline FranklinRosaline Franklin 1920-07-251920-07-25 1 William GossetWilliam Gosset 1876-06-131876-06-13 2 Florence NightingaleFlorence Nightingale 1820-05-121820-05-12 3 Marie CurieMarie Curie 1867-11-071867-11-07 4 Rachel CarsonRachel Carson 1907-05-271907-05-27 5 John SnowJohn Snow 1813-03-151813-03-15 6 Alan TuringAlan Turing 1912-06-231912-06-23 7 Johann GaussJohann Gauss 1777-04-301777-04-30 Died Age Occupation 0 1958-04-161958-04-16 74 ChemistChemist 1 1937-10-161937-10-16 122 StatisticianStatistician 2 1910-08-131910-08-13 180 NurseNurse 3 1934-07-041934-07-04 132 ChemistChemist 4 1964-04-141964-04-14 112 BiologistBiologist 5 1858-06-161858-06-16 90 PhysicianPhysician 6 1954-06-071954-06-07 82 Computer ScientistComputer Scientist 7 1855-02-231855-02-23 154 MathematicianMathematician 修改列 #!/usr/bin/python3 # -*- coding: utf-8 -*- # Author: xurongzhong#126.com wechat:pythontesting qq:37391319 # qq群:144081101 591302926 567351477 # CreateDate: 2018-06-07 # change.py import pandas as pd import numpy as np import random scientists = pd.read_csv('../data/scientists.csv') print(scientists['Born'].dtype) print(scientists['Died'].dtype) print(scientists.head()) # 转为日期 参考:https://docs.python.org/3.5/library/datetime.html born_datetime = pd.to_datetime(scientists['Born'], format='%Y-%m-%d') died_datetime = pd.to_datetime(scientists['Died'], format='%Y-%m-%d') # 增加列 scientists['born_dt'], scientists['died_dt'] = (born_datetime, died_datetime) print(scientists.shape) print(scientists.head()) random.seed(42) random.shuffle(scientists['Age']) # 此修改会作用于scientists print(scientists.head()) scientists['age_days_dt'] = (scientists['died_dt'] - scientists['born_dt']) print(scientists.head()) 执行结果: $ python3 change.py object object Name Born Died Age Occupation 0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist 1 William Gosset 1876-06-13 1937-10-16 61 Statistician 2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse 3 Marie Curie 1867-11-07 1934-07-04 66 Chemist 4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist (8, 7) Name Born Died Age Occupation born_dt \ 0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist 1920-07-25 1 William Gosset 1876-06-13 1937-10-16 61 Statistician 1876-06-13 2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse 1820-05-12 3 Marie Curie 1867-11-07 1934-07-04 66 Chemist 1867-11-07 4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist 1907-05-27 died_dt 0 1958-04-16 1 1937-10-16 2 1910-08-13 3 1934-07-04 4 1964-04-14 /usr/lib/python3.5/random.py:272: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy x[i], x[j] = x[j], x[i] Name Born Died Age Occupation born_dt \ 0 Rosaline Franklin 1920-07-25 1958-04-16 66 Chemist 1920-07-25 1 William Gosset 1876-06-13 1937-10-16 56 Statistician 1876-06-13 2 Florence Nightingale 1820-05-12 1910-08-13 41 Nurse 1820-05-12 3 Marie Curie 1867-11-07 1934-07-04 77 Chemist 1867-11-07 4 Rachel Carson 1907-05-27 1964-04-14 90 Biologist 1907-05-27 died_dt 0 1958-04-16 1 1937-10-16 2 1910-08-13 3 1934-07-04 4 1964-04-14 Name Born Died Age Occupation born_dt \ 0 Rosaline Franklin 1920-07-25 1958-04-16 66 Chemist 1920-07-25 1 William Gosset 1876-06-13 1937-10-16 56 Statistician 1876-06-13 2 Florence Nightingale 1820-05-12 1910-08-13 41 Nurse 1820-05-12 3 Marie Curie 1867-11-07 1934-07-04 77 Chemist 1867-11-07 4 Rachel Carson 1907-05-27 1964-04-14 90 Biologist 1907-05-27 died_dt age_days_dt 0 1958-04-16 13779 days 1 1937-10-16 22404 days 2 1910-08-13 32964 days 3 1934-07-04 24345 days 4 1964-04-14 20777 days 数据导入导出 out.py #!/usr/bin/python3 # -*- coding: utf-8 -*- # Author: china-testing#126.com wechat:pythontesting qq群:630011153 # CreateDate: 2018-3-31 # out.py import pandas as pd import numpy as np import random scientists = pd.read_csv('../data/scientists.csv') names = scientists['Name'] print(names) names.to_pickle('../output/scientists_names_series.pickle') scientists.to_pickle('../output/scientists_df.pickle') # .p, .pkl, .pickle 是常用的pickle文件扩展名 scientist_names_from_pickle = pd.read_pickle('../output/scientists_df.pickle') print(scientist_names_from_pickle) names.to_csv('../output/scientist_names_series.csv') scientists.to_csv('../output/scientists_df.tsv', sep='\t') # 不输出行号 scientists.to_csv('../output/scientists_df_no_index.csv', index=None) # Series可以转为df再输出成excel文件 names_df = names.to_frame() names_df.to_excel('../output/scientists_names_series_df.xls') names_df.to_excel('../output/scientists_names_series_df.xlsx') scientists.to_excel('../output/scientists_df.xlsx', sheet_name='scientists', index=False) 执行结果: $ python3 out.py 0 Rosaline Franklin 1 William Gosset 2 Florence Nightingale 3 Marie Curie 4 Rachel Carson 5 John Snow 6 Alan Turing 7 Johann Gauss Name: Name, dtype: object Name Born Died Age Occupation 0 Rosaline Franklin 1920-07-25 1958-04-16 37 Chemist 1 William Gosset 1876-06-13 1937-10-16 61 Statistician 2 Florence Nightingale 1820-05-12 1910-08-13 90 Nurse 3 Marie Curie 1867-11-07 1934-07-04 66 Chemist 4 Rachel Carson 1907-05-27 1964-04-14 56 Biologist 5 John Snow 1813-03-15 1858-06-16 45 Physician 6 Alan Turing 1912-06-23 1954-06-07 41 Computer Scientist 7 Johann Gauss 1777-04-30 1855-02-23 77 Mathematician 注意:序列一般是直接输出成excel文件 更多的输入输出方法: 方式 描述 to_clipboard 将数据保存到系统剪贴板进行粘贴 to_dense 将数据转换为常规“密集”DataFrame to_dict 将数据转换为Python字典 to_gbq 将数据转换为Google BigQuery表格 toJidf 将数据保存为分层数据格式(HDF) to_msgpack 将数据保存到可移植的类似JSON的二进制文件中 toJitml 将数据转换为HTML表格 tojson 将数据转换为JSON字符串 toJatex 将数据转换为LTEXtabular环境 to_records 将数据转换为记录数组 to_string 将DataFrame显示为stdout的字符串 to_sparse 将数据转换为SparceDataFrame to_sql 将数据保存到SQL数据库中 to_stata 将数据转换为Stata dta文件 读CSV文件 read_csv.py #!/usr/bin/python3 # -*- coding: utf-8 -*- # Author: china-testing#126.com wechat:pythontesting QQ群:630011153 # CreateDate: 2018-3-9 # read_csv.py import pandas as pd df = pd.read_csv("1.csv", header=None) # 不读取列名 print("df:") print(df) print("df.head():") print(df.head()) # head(self, n=5),默认为5行,类似的有tail print("df.tail():") print(df.tail()) df = pd.read_csv("1.csv") # 默认读取列名 print("df:") print(df) df = pd.read_csv("1.csv", names=['号码','群号']) # 自定义列名 print("df:") print(df) # 自定义列名,去掉第一行 df = pd.read_csv("1.csv", skiprows=[0], names=['号码','群号']) print("df:") print(df) 执行结果: df: 0 1 0 qq qqgroup 1 37391319 144081101 2 37391320 144081102 3 37391321 144081103 4 37391322 144081104 5 37391323 144081105 6 37391324 144081106 7 37391325 144081107 8 37391326 144081108 9 37391327 144081109 10 37391328 144081110 11 37391329 144081111 12 37391330 144081112 13 37391331 144081113 14 37391332 144081114 15 37391333 144081115 df.head(): 0 1 0 qq qqgroup 1 37391319 144081101 2 37391320 144081102 3 37391321 144081103 4 37391322 144081104 df.tail(): 0 1 11 37391329 144081111 12 37391330 144081112 13 37391331 144081113 14 37391332 144081114 15 37391333 144081115 df: qq qqgroup 0 37391319 144081101 1 37391320 144081102 2 37391321 144081103 3 37391322 144081104 4 37391323 144081105 5 37391324 144081106 6 37391325 144081107 7 37391326 144081108 8 37391327 144081109 9 37391328 144081110 10 37391329 144081111 11 37391330 144081112 12 37391331 144081113 13 37391332 144081114 14 37391333 144081115 df: 号码 群号 0 qq qqgroup 1 37391319 144081101 2 37391320 144081102 3 37391321 144081103 4 37391322 144081104 5 37391323 144081105 6 37391324 144081106 7 37391325 144081107 8 37391326 144081108 9 37391327 144081109 10 37391328 144081110 11 37391329 144081111 12 37391330 144081112 13 37391331 144081113 14 37391332 144081114 15 37391333 144081115 df: 号码 群号 0 37391319 144081101 1 37391320 144081102 2 37391321 144081103 3 37391322 144081104 4 37391323 144081105 5 37391324 144081106 6 37391325 144081107 7 37391326 144081108 8 37391327 144081109 9 37391328 144081110 10 37391329 144081111 11 37391330 144081112 12 37391331 144081113 13 37391332 144081114 14 37391333 144081115 写CSV文件 #!/usr/bin/python3 # -*- coding: utf-8 -*- # write_csv.py import pandas as pd data ={'qq': [37391319,37391320], 'group':[1,2]} df = pd.DataFrame(data=data, columns=['qq','group']) df.to_csv('2.csv',index=False) 读写excel和csv类似,不过要改用read_excel来读,excel_summary_demo, 提供了多个excel求和的功能,可以做为excel读写的实例,这里不再赘述。 参考资料 技术支持qq群144081101 591302926 567351477 钉钉免费群21745728 本文最新版本地址 本文涉及的python测试开发库 谢谢点赞! 本文相关海量书籍下载 源码下载 本文英文版书籍下载

资源下载

更多资源
Mario

Mario

马里奥是站在游戏界顶峰的超人气多面角色。马里奥靠吃蘑菇成长,特征是大鼻子、头戴帽子、身穿背带裤,还留着胡子。与他的双胞胎兄弟路易基一起,长年担任任天堂的招牌角色。

腾讯云软件源

腾讯云软件源

为解决软件依赖安装时官方源访问速度慢的问题,腾讯云为一些软件搭建了缓存服务。您可以通过使用腾讯云软件源站来提升依赖包的安装速度。为了方便用户自由搭建服务架构,目前腾讯云软件源站支持公网访问和内网访问。

Spring

Spring

Spring框架(Spring Framework)是由Rod Johnson于2002年提出的开源Java企业级应用框架,旨在通过使用JavaBean替代传统EJB实现方式降低企业级编程开发的复杂性。该框架基于简单性、可测试性和松耦合性设计理念,提供核心容器、应用上下文、数据访问集成等模块,支持整合Hibernate、Struts等第三方框架,其适用范围不仅限于服务器端开发,绝大多数Java应用均可从中受益。

Rocky Linux

Rocky Linux

Rocky Linux(中文名:洛基)是由Gregory Kurtzer于2020年12月发起的企业级Linux发行版,作为CentOS稳定版停止维护后与RHEL(Red Hat Enterprise Linux)完全兼容的开源替代方案,由社区拥有并管理,支持x86_64、aarch64等架构。其通过重新编译RHEL源代码提供长期稳定性,采用模块化包装和SELinux安全架构,默认包含GNOME桌面环境及XFS文件系统,支持十年生命周期更新。

用户登录
用户注册