有關爬蟲

簡單範例:

爬完後存到檔案

const http = require("http");
const fs = require("fs");
const iconv = require("iconv-lite");
const querystring = require("querystring");
const cheerio = require("cheerio");
const filepath = "./data.csv";

let reqPageNum = 0;
let totalPageCount = 18;

function sendRequest(pageNum) {
  const requestBody = {
    PG2: ` ${pageNum} `,
    PgNo: pageNum,
    s: 0,
  };
  const postData = querystring.stringify(requestBody);

  const options = {
    hostname: "lotto.bestshop.com.tw",
    path: "/649/where.asp",
    method: "POST",
    headers: {
      "Content-Type": "application/x-www-form-urlencoded",
      "Content-Length": Buffer.byteLength(postData),
    },
  };

  // 因為是要存 buffer 必須用 array
  const result = [];
  const req = http.request(options, (res) => {
    res.on("data", (chunk) => {
      result.push(chunk);
    });
    res.on("end", () => {
      const resp = iconv.decode(Buffer.concat(result), "big5");
      parseDom(resp);
    });
  });
  req.on("error", (e) => {
    console.error(`problem with request: ${e.message}`);
  });
  req.write(postData);
  req.end();
}

function parseDom(dom) {
  let count = 0;
  $ = cheerio.load(dom);
  $(".TDLine1").each((i, ele) => {
    if (count === 8 || count === 0) {
      fs.appendFileSync(filepath, "\r\n");
      count = 0;
    }
    const content = $(ele).text();
    fs.appendFileSync(filepath, `${count !== 0 ? "," : ""}${content}`);
    count += 1;
  });
  if (reqPageNum < totalPageCount) {
    reqPageNum += 1;
    sendRequest(reqPageNum);
  }
}

// Create empty file
fs.writeFile(filepath, "", { flag: "wx" }, function (err) {
  if (err) throw err;
});
sendRequest(reqPageNum);

Last updated