Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python3
- # _*_ utf-8 _*_
- # __author__: "gcy"
- """
- 输入:一个xls格式文件
- 输出:类别字段中 记者数,top12 和 next30的数量,以及的数量"
- 实现的功能
- """
- import pandas as pd
- excelFilePath = r"F:\test.xls"
- # TOP12 next30列表 和其各自的统计变量
- top12Count = 0
- top12 = ["AFP", "AP", "BBC", "Bloomberg", "CNBC", "CNN", "Forbes", "Fortune", "FT", "Media Title", "New York Times",
- "Reuters", "Wall Street Journal"]
- next30Count = 0
- next30 = ["ANSA", "Huffington Post", "Sky News", "Business Insider", "Le Monde", "SF Chronicle", "Cheddar", "Mashable",
- "The Guardian", "Daily Mail", "Mid-East Eco Review", "The Independent", "Der Spiegel", "Nikkei Asian Review ",
- "The Economist", "DPA", "Press Trust of India", "The Information", "EFE", "Quartz", "Times of India",
- "Fast Company", "Re/Code, The Verge", "USA TODAY", "Gulf News", "RFI", "Washington Post",
- "Harvard Business Review", "SCMP", "Wired"]
- # 读取xls转成一个dataframe
- df = pd.DataFrame(pd.read_excel(excelFilePath))
- # 保留所需要的字段
- dfNeed = df[['姓名', '性别', '所在国家', '职务', '机构', '简介', '领域', '类别', '电子邮件']]
- # 将保留的字段导出一个新的文件
- needFieldExcelPath = excelFilePath.split(".")[0] + "_needField.xlsx"
- # dfNeed.to_excel(needFieldExcelPath, index=False)
- # !待完成python + openpyxl 输出Excel单元格大小自适应和填充
- reporterCount = 0
- # ??判断逻辑的问题 记者是如果判断的-->类别中含有记者就是记者?还是类别中只有记者才是记者
- print(df["电子邮件"].count())
- for i in range(df["电子邮件"].count()):
- if df["所在国家"][i] != "德国" and "记者" in str(df["类别"][i]):
- reporterCount = reporterCount + 1
- if df["机构"][i] in top12:
- top12Count = top12Count + 1
- if df["机构"][i] in next30:
- next30Count = next30Count + 1
- msg = "公开邮箱人数:" + str(df["电子邮件"].count()) + "人,其中记者" + str(reporterCount) + "人" \
- "(其中TOP12 " + str(
- top12Count) + "人,NEXT30 " + str(next30Count) + "人);专家和KOL " + str(df["电子邮件"].count() - reporterCount) + "人;"
- print(msg)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement