)
)
})
return this.extractData
}
三、定时任务
cron 每天跑一跑
function job () {
let cronJob = new cron.CronJob({
cronTime: cronConfig.cronTime,
onTick: () => {
spider()
},
start: false
})
cronJob.start()
}四、数据持久化
数据持久化理论上应该不属于爬虫关心的范围,用 mongoose ,创建Model
import mongoose from 'mongoose'
const Schema = mongoose.Schema
const NewsSchema = new Schema(
{
title: { type: 'String', required: true },
url: { type: 'String', required: true },
summary: String,
recommend: { type: Boolean, default: false },
source: { type: Number, required: true, default: 0 },
status: { type: Number, required: true, default: 0 },
createdTime: { type: Date, default: Date.now }
},
{
collection: 'news'
}
)
export default mongoose.model('news', NewsSchema)基本操作
import { OBJ_STATUS } from '../../Constants'
class BaseService {
constructor (ObjModel) {
this.ObjModel = ObjModel
} saveObject (objData) {
return new Promise((resolve, reject) => {
this.ObjModel(objData).save((err, result) => {
if (err) {
return reject(err)
}
return resolve(result)
})
})
}
}
export default BaseService
资讯
import BaseService from './BaseService'
import News from '../models/News'
class NewsService extends BaseService {}
export default new NewsService(News)愉快地保存数据
await newsService.batchSave(newsListTem)更多内容到Github把项目clone下来看就好了。
总结









