pyhton第三次作业2019-03-29

作者: 愉快先生 | 来源:发表于2019-04-24 15:25 被阅读0次

pyhton第三次作业2019-03-29
day5
python（—）了解、和简单的数据类型转换
Python脚本一件打包
pyhton实战作业2_2
python 基础 - 开发环境搭建
python2和python3版本共存后pip使用方法
赵浚皓思维导图
第三次作业
python导出excel，csv

'''
1统计一个文档词频aboutUN.txt，并将结果写入到wordsFrequence.text
涉及到分词
词出现的次数
字典或集合
用split()
2一个文本文件，获取目前联合国的193个会员国的名字。并将其写入UNmemebers.txt中。
一种是正则表达式，另一种是使用美味汤 xpath etree等工具

第一个题：

import requests, sys
import numpy as np
import pandas as pd
from lxml import etree
import  pickle
class UNmember:
   country_And_region232=[]
   date=[]
   location=[]
   population=[]
   area=[]
   UNmemberset=()
#然而并不知道面向对象怎么用



if __name__ == "__main__":
   # 貌似只能配套使用，解析，用xpath方法定位标签（class，id具体定位），及文字。
   #取232个国家名
   html = etree.parse('http://data.un.org/en/index.html', etree.HTMLParser())
   UNmember.country_And_region232 = html.xpath('//ul/li//td[4]/text()')
   # result_location = html.xpath('//ul/li//font/text()')

   print(UNmember.country_And_region232)
   print(len(UNmember.country_And_region232))


#这是最开始给的HTML文件，取193个成员国国家名
   html0=etree.parse('file:///C:/Users/goals2020/Desktop/UN.html',etree.HTMLParser())
   result_UNcountry193=html0.xpath('//div[@class="view-content"]//span[@class="member-state-name"]/text()')
   print(result_UNcountry193)
   print(len(result_UNcountry193))

#取每个国家的人口面积等属性，这是网页
   target = 'http://data.un.org/en/'
   information = html.xpath('//ul/li/a')
#循环取属性，放到这写表里
   for i in information:
      html_information=etree.parse(target + i.get('href'),etree.HTMLParser())
      result_location=html_information.xpath('//ul//tbody/tr[1]/td[3]/text()')
      # result_date=html_information.xpath('//ul//tbody/tr[2]/td[3]/text()')
      # result_population=html_information.xpath('//ul//tbody/tr[3]/td[3]/text()')
      # result_area=html_information.xpath('//ul//tbody/tr[4]/td[3]/text()')
      UNmember.location.append(result_location)
      # UNmember.date.append(result_date)
      # UNmember.population.append(result_population)
      # UNmember.area.append(result_area)
      print(result_location)

   #list 表的合并，成二维数组。之后按列操作
   result_list=list(zip(UNmember.country_And_region232,UNmember.location))
   #,UNmember.date,UNmember.population,UNmember.area

#这是按193个国家名和232个国家名取交集，从而取其列，结果放到l7里
   l7=[]

   for i in range(0,len(result_UNcountry193)):
      for j in range(0,
                     len(result_list)):

      # print(i)
         if result_UNcountry193[i] ==result_list[j][0]:
            l7.append(result_list[j])
         # print(l7)
      else:continue

   # UNmember.UNmemeberset=set(l7)
   print(l7)
   print(len(l7))
   #l7是我最后得出的结果
   # out= open("D。。。。。一个路径text.dat",'wb')
   # pickle.dump(l7,out)
   # out.close()
   #
   #
   # infile=open('D。。。。。一个路径text.dat','rb')
   # pickle.load(inflie)


#todo 结果有数组有单个的表，应该统一格式进行写入文件操作。