process_item(self, item, spider): 函数并传入item,spider等参数 在这里可以将数据进行持久化储存 我的piple代码
# -*- coding: utf-8 -*-# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.htmlimport MySQLdb
import MySQLdb.cursorsfrom twisted.enterprise import adbapiclass MyPipeline(object): ##这里的函数名于setting中的对应
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod ##得到数据库的连接
def from_settings(cls, settings):
dbargs = dict(
host = settings['MYSQL_HOST'],
db = settings['MYSQL_DBNAME'],
port = settings['MYSQL_PORT'],
user = settings['MYSQL_USER'],
passwd = settings['MYSQL_PASSWD'],
charset = 'utf8',
cursorclass = MySQLdb.cursors.DictCursor,
use_unicode = True,
)
dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)return cls(dbpool)
def process_item(self, item, spider): ##这个函数会在spider返回时调用
d = self.dbpool.runInteraction(self._do_upinsert, item, spider)return item
def _do_upinsert(self, conn, item, spider):
valid = Truefor data in item:if not data:
valid = Falseif valid: ##执行sql
result = conn.execute(‘sql’) if result:
print 'added a record'else:
print 'failed insert into table'
查看更多关于分享pipeline存储的实例代码的详细内容...
声明:本文来自网络,不代表【好得很程序员自学网】立场,转载请注明出处:http://www.haodehen.cn/did84945