文章内容

2017/8/20 16:57:02,作 者: 黄兵

python写入csv文件中文乱码解决方案

今天修改程序,需要写入csv文件,发现中文会乱码,后来查了下,可以设置文件写入属性后,就可以解决:

# -*- coding: utf-8 -*-

import csv
import pyodbc
import lxml.html
from link_crawler import link_crawler
from datetime import datetime
import sys
import codecs

reload(sys)
sys.setdefaultencoding('utf8')
class ScrapeCallback:
def __init__(self):
self.writer = csv.writer(open('countries.csv', 'w'))
self.fields = ('title','context')
self.writer.writerow(self.fields)

def __call__(self, url, html):
tree = lxml.html.fromstring(html)
#conn = pyodbc.connect(r'DRIVER={SQL Server Native Client 10.0};SERVER=14;DATABASE=m;UID=Faa;PWD=hu18')
#cursor = conn.cursor()
#cursor.execute("INSERT INTO Articles (Title, CreateTime,Reader,ArticleBody,Category_Id,SEOId,AuthorId,Topic,ImgUrl,News,FirstProject,SecondProject)VALUES (\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\")",
# (title,datetime.now(),0,context,'',6,1,0,'',0,0,0))
#cursor.execute("insert into Articles(Title, CreateTime,Reader,ArticleBody,Category_Id,SEOId,AuthorId,Topic,ImgUrl,News,FirstProject,SecondProject) values (title,datetime.now(),0,context,,6,1,0,,0,0,0 )")
#conn.commit()
#cursor.execute("SELECT * FROM Articles")
#rows = cursor.fetchall()
#for row in rows:
# print(row.Title, row.ArticleBody)
row = []
for trees in tree.cssselect('#essay_xb_C'):
title=trees.cssselect('.essay_xb_tle > h2')[0].text_content()
context=trees.cssselect('.text_C')[0].text_content()
row.append([title,context])
print(row)
with open('countries.csv', 'w') as f:
writer = csv.writer(f)
f.write(codecs.BOM_UTF8)
writer.writerow(['title','context'])
for rows in row:
print(rows)
writer.writerow(rows)
#for field in self.fields:
# row.append(title,context)
#elf.writer.writerow(row)

if __name__ == '__main__':
link_crawler('http://www.mylikesz.com/', '/(html|view)', scrape_callback=ScrapeCallback())

最为关键的一句就是:csvfile.write(codecs.BOM_UTF8),有了这一句,中文就能正常写入

参考资料:python写入csv文件中文乱码解决方案

分享到:

发表评论

评论列表