Advertisement
Guest User

Untitled

a guest
Jul 20th, 2019
291
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.30 KB | None | 0 0
  1. #! /usr/bin/env python3
  2. # _*_ utf-8 _*_
  3. # __author__: "gcy"
  4. """
  5. 输入:一个xls格式文件
  6. 输出:类别字段中 记者数,top12 和 next30的数量,以及的数量"
  7. 实现的功能
  8. """
  9.  
  10. import pandas as pd
  11.  
  12. excelFilePath = r"F:\test.xls"
  13. # TOP12 next30列表 和其各自的统计变量
  14. top12Count = 0
  15. top12 = ["AFP", "AP", "BBC", "Bloomberg", "CNBC", "CNN", "Forbes", "Fortune", "FT", "Media Title", "New York Times",
  16. "Reuters", "Wall Street Journal"]
  17.  
  18. next30Count = 0
  19. next30 = ["ANSA", "Huffington Post", "Sky News", "Business Insider", "Le Monde", "SF Chronicle", "Cheddar", "Mashable",
  20. "The Guardian", "Daily Mail", "Mid-East Eco Review", "The Independent", "Der Spiegel", "Nikkei Asian Review ",
  21. "The Economist", "DPA", "Press Trust of India", "The Information", "EFE", "Quartz", "Times of India",
  22. "Fast Company", "Re/Code, The Verge", "USA TODAY", "Gulf News", "RFI", "Washington Post",
  23. "Harvard Business Review", "SCMP", "Wired"]
  24.  
  25. # 读取xls转成一个dataframe
  26. df = pd.DataFrame(pd.read_excel(excelFilePath))
  27.  
  28. # 保留所需要的字段
  29. dfNeed = df[['姓名', '性别', '所在国家', '职务', '机构', '简介', '领域', '类别', '电子邮件']]
  30.  
  31. # 将保留的字段导出一个新的文件
  32. needFieldExcelPath = excelFilePath.split(".")[0] + "_needField.xlsx"
  33. # dfNeed.to_excel(needFieldExcelPath, index=False)
  34. # !待完成python + openpyxl 输出Excel单元格大小自适应和填充
  35.  
  36. reporterCount = 0
  37. # ??判断逻辑的问题 记者是如果判断的-->类别中含有记者就是记者?还是类别中只有记者才是记者
  38. print(df["电子邮件"].count())
  39. for i in range(df["电子邮件"].count()):
  40. if df["所在国家"][i] != "德国" and "记者" in str(df["类别"][i]):
  41. reporterCount = reporterCount + 1
  42. if df["机构"][i] in top12:
  43. top12Count = top12Count + 1
  44. if df["机构"][i] in next30:
  45. next30Count = next30Count + 1
  46.  
  47. msg = "公开邮箱人数:" + str(df["电子邮件"].count()) + "人,其中记者" + str(reporterCount) + "人" \
  48. "(其中TOP12 " + str(
  49. top12Count) + "人,NEXT30 " + str(next30Count) + "人);专家和KOL " + str(df["电子邮件"].count() - reporterCount) + "人;"
  50.  
  51. print(msg)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement