From 198ba79187c1da4f675a9e86e02c08db47d3fddc Mon Sep 17 00:00:00 2001 From: rsgltzyd Date: Sun, 28 Jul 2024 18:45:34 +0800 Subject: [PATCH 1/7] fix --- login.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/login.js b/login.js index f7844ce..42254c0 100644 --- a/login.js +++ b/login.js @@ -3,7 +3,7 @@ import fs from 'fs'; import 'dotenv/config.js'; (async () => { - const browser = await launch({ headless: true, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); + const browser = await launch({ headless: false, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); const page = await browser.newPage(); await page.goto('https://dev-dmp.meiguanjia.net/login'); await page.locator('#userName input').fill(process.env.ACCOUNT); @@ -19,6 +19,10 @@ import 'dotenv/config.js'; const importStorage = JSON.stringify(localStorage); const file = './data/user.json'; + if (!fs.existsSync('./data/')) { + fs.mkdirSync('./data/'); + console.log('文件夹不存在,创建文件夹'); + } fs.writeFileSync(file, importStorage, 'utf-8'); await browser.close(); })(); \ No newline at end of file From f49aa6531d5d78475c64fde109648299b6798dc4 Mon Sep 17 00:00:00 2001 From: rsgltzyd Date: Mon, 29 Jul 2024 22:17:43 +0800 Subject: [PATCH 2/7] fix --- BusinessDataAnalysisSheet.js | 24 ---------------------- data/zhihu.json | 17 +++++++++++++++ example.js | 2 +- example2.js | 40 ------------------------------------ login.js | 28 ------------------------- uselogin.js | 32 +++++++++++++++++++++++------ utils/userUtils.js | 2 ++ 7 files changed, 46 insertions(+), 99 deletions(-) delete mode 100644 BusinessDataAnalysisSheet.js create mode 100644 data/zhihu.json delete mode 100644 example2.js delete mode 100644 login.js diff --git a/BusinessDataAnalysisSheet.js b/BusinessDataAnalysisSheet.js deleted file mode 100644 index ceb5df0..0000000 --- a/BusinessDataAnalysisSheet.js +++ /dev/null @@ -1,24 +0,0 @@ -import { launch } from 'puppeteer'; -import 'dotenv/config.js' - -(async () => { - const browser = await launch({ headless: true, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); - const page = await browser.newPage(); - await page.setViewport({ width: 1920, height: 1080 }); - await page.goto('https://dev-dmp.meiguanjia.net/report/businessData'); - - console.log(current); - console.log(process.env.ACCOUNT); - - const item = await page.$eval('.item.cur', node=>node.innerHTML); - console.log(item); - // const arcoMessage = await page.$eval('.arco-message .arco-message-content', node => node.innerHTML); - // await page.screenshot({ path: 'resetPassword.png' }); - console.log(arcoMessage); - - page.on('console', msg => { - console.log('PAGE:', msg.text()) - }) - - await browser.close(); -})(); \ No newline at end of file diff --git a/data/zhihu.json b/data/zhihu.json new file mode 100644 index 0000000..754506b --- /dev/null +++ b/data/zhihu.json @@ -0,0 +1,17 @@ +{ + "_xsrf": "DbVKUPkNSllVquHSGYYzhSxVfoN7Jv3y", + "_zap": "a93b33b3-5012-4fd9-896f-255399bab405", + "d_c0": "AEASegND_hiPTiUjKXU9EemEsTqgkQY453I", + "q_c1": "80150a2bf62e448399ddc5a9ad2fc96f|1722160079000|1722160079000", + "__zse_ck": "001_7B93TpL6TWwuu2z/poOWqUavUfxDbgsj77T8XUma+QyB8h0eUaQadkTFioz5mB5mtErIJbefbPdYH", + "Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49": "1720611937,1722164301,1722254161,1722258218", + "HMACCOUNT": "F0E98F57775BDBB4", + "__snaker__id": "USsgt4pZsujP2PgG", + "gdxidpyhxdE": "knS%2B%2BOKbPWfLNSgDUJMmJY7JxxGDy0jcaOcD5ep1L6V6esNdpEEeWqhNc16agv%2BwcyPuhp4CPnzejgUSAMGm5yC1gZHkDt4atQdjYAjJZVUx%5CRlUrRzPxWiE3BlG3t%2Fs7xAJ%5C2TW1OR7oiM%2F4PvwKwiI4oj9P6ALxKaDrTRyPZhNHeiT%3A1722259465069", + "SESSIONID": "jbNinEEfFAY9yMN2vgpUPblqsLKGEm5IYKl1Lt2ripF", + "JOID": "V10WB0Jst9Ts_0beWmxLSndHLzlKBtOdiJ0DnTsm9ovSiweQbk0heYvzQN9QEPqH8CAwbxbw5C7iCfEwV9Kj-2o", + "osd": "VlkTAktts9Hp9kfaX2lCS3NCKjBLAtaYgZwHmD4v94_Xjg6RakgkcIr3RdpZEf6C9SkxaxP17S_mDPQ5Vtam_mM", + "tst": "h", + "Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49": "1722259680", + "BEC": "32377ec81629ec05d48c98f32428ae46" +} \ No newline at end of file diff --git a/example.js b/example.js index ab1a0f2..753d4bb 100644 --- a/example.js +++ b/example.js @@ -32,7 +32,7 @@ import 'dotenv/config.js' page.on('console', msg => { console.log('PAGE:', msg.text()) - }) + }) await browser.close(); })(); \ No newline at end of file diff --git a/example2.js b/example2.js deleted file mode 100644 index 7ac1317..0000000 --- a/example2.js +++ /dev/null @@ -1,40 +0,0 @@ -import { launch } from 'puppeteer'; -import fs from 'fs'; -import 'dotenv/config.js' -import { setLocalStorage } from './utils/userUtils.js'; -import { getDate, cleanType } from './utils/businessDataUtil.js'; - -(async () => { - const browser = await launch({ headless: false, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); - const page = await browser.newPage(); - setLocalStorage(page, '.\\data\\user.json'); - await page.goto('https://dev-dmp.meiguanjia.net/report/businessData'); - - await page.waitForSelector('.menu-list-second'); - const childElements = await page.$$('.menu-list-second .menu-list-second-item'); - await new Promise(r => setTimeout(r, 1000)); - await childElements[1].click(); - const str = await page.evaluate(node => node.textContent.trim(), childElements[1]); - console.log(str); - - const file = './data/businessData.json'; - - fs.writeFileSync(file, '', 'utf-8'); - - await getDate(page, fs, file); - console.log('========'); - - await cleanType(page, '.arco-picker-start-time', '2024-06'); - await getDate(page, fs, file); - console.log('========'); - - await cleanType(page, '.arco-picker-start-time', '2024-05'); - await getDate(page, fs, file); - console.log('========'); - - await cleanType(page, '.arco-picker-start-time', '2024-04'); - await getDate(page, fs, file); - console.log('========'); - - await browser.close(); -})(); \ No newline at end of file diff --git a/login.js b/login.js deleted file mode 100644 index 42254c0..0000000 --- a/login.js +++ /dev/null @@ -1,28 +0,0 @@ -import { launch } from 'puppeteer'; -import fs from 'fs'; -import 'dotenv/config.js'; - -(async () => { - const browser = await launch({ headless: false, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); - const page = await browser.newPage(); - await page.goto('https://dev-dmp.meiguanjia.net/login'); - await page.locator('#userName input').fill(process.env.ACCOUNT); - await page.locator('#passWord input').fill(process.env.PASSWORD); - await page.locator('[type="submit"]').click(); - - await page.waitForSelector('.arco-message .arco-message-content', { visible: true }); - const arcoMessage = await page.$eval('.arco-message .arco-message-content', node => node.innerHTML); - console.log(arcoMessage); - - await new Promise(r => setTimeout(r, 1000)); - const localStorage = await page.evaluate(() => Object.assign({}, window.localStorage)); - - const importStorage = JSON.stringify(localStorage); - const file = './data/user.json'; - if (!fs.existsSync('./data/')) { - fs.mkdirSync('./data/'); - console.log('文件夹不存在,创建文件夹'); - } - fs.writeFileSync(file, importStorage, 'utf-8'); - await browser.close(); -})(); \ No newline at end of file diff --git a/uselogin.js b/uselogin.js index e9316c7..6fb170a 100644 --- a/uselogin.js +++ b/uselogin.js @@ -7,11 +7,31 @@ import { setLocalStorage } from './utils/userUtils.js'; (async () => { const browser = await launch({ headless: false, devtools: true, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); const page = await browser.newPage(); - await page.goto('https://dev-dmp.meiguanjia.net/login'); - await setLocalStorage(page, 'user.json'); - - await page.goto('https://dev-dmp.meiguanjia.net/report/businessData'); - - await browser.close(); + await page.goto('https://www.zhihu.com/hot'); + const outputLocalStorge = fs.readFileSync('./data/zhihu.json', 'utf-8'); + let localStorageTest = JSON.parse(outputLocalStorge); + let tmpcookies = await page.cookies(); + console.log(tmpcookies); + await page.deleteCookie(...tmpcookies); + + const cookies = new Array(); + for (const iterator in localStorageTest) { + cookies.push({ + name: iterator, + value: localStorageTest[iterator], + domain: '.zhihu.com', + path: '/', + httpOnly: true, + secure: true + }); + console.log(localStorageTest[iterator]); + } + + console.log(cookies); + + await page.setCookie(...cookies); + + await page.goto('https://www.zhihu.com/hot'); + // await browser.close(); })(); \ No newline at end of file diff --git a/utils/userUtils.js b/utils/userUtils.js index b44783c..db9513d 100644 --- a/utils/userUtils.js +++ b/utils/userUtils.js @@ -17,4 +17,6 @@ export async function setLocalStorage(page, file) { localStorage.setItem('merchant_login_data', localStorageTest.merchant_login_data); localStorage.setItem('merchant_metadata_data', localStorageTest.merchant_metadata_data); }, localStorageTest); + + console.log('set success'); } \ No newline at end of file From 7ff9cc663fe5d717a881a263cc3e13a0424e4b80 Mon Sep 17 00:00:00 2001 From: rsgltzyd Date: Tue, 30 Jul 2024 20:58:55 +0800 Subject: [PATCH 3/7] fix --- .gitignore | 1 + data/zhihu.json | 17 ----------------- example.js | 33 +++++---------------------------- uselogin.js | 34 ++++++---------------------------- utils/businessDataUtil.js | 36 ------------------------------------ utils/userUtils.js | 17 +++++++++++++++++ 6 files changed, 29 insertions(+), 109 deletions(-) delete mode 100644 data/zhihu.json delete mode 100644 utils/businessDataUtil.js diff --git a/.gitignore b/.gitignore index ebf6dfa..d8e98cd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ node_modules/ *.png businessData.json data/user.json +data/zhihu.json diff --git a/data/zhihu.json b/data/zhihu.json deleted file mode 100644 index 754506b..0000000 --- a/data/zhihu.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "_xsrf": "DbVKUPkNSllVquHSGYYzhSxVfoN7Jv3y", - "_zap": "a93b33b3-5012-4fd9-896f-255399bab405", - "d_c0": "AEASegND_hiPTiUjKXU9EemEsTqgkQY453I", - "q_c1": "80150a2bf62e448399ddc5a9ad2fc96f|1722160079000|1722160079000", - "__zse_ck": "001_7B93TpL6TWwuu2z/poOWqUavUfxDbgsj77T8XUma+QyB8h0eUaQadkTFioz5mB5mtErIJbefbPdYH", - "Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49": "1720611937,1722164301,1722254161,1722258218", - "HMACCOUNT": "F0E98F57775BDBB4", - "__snaker__id": "USsgt4pZsujP2PgG", - "gdxidpyhxdE": "knS%2B%2BOKbPWfLNSgDUJMmJY7JxxGDy0jcaOcD5ep1L6V6esNdpEEeWqhNc16agv%2BwcyPuhp4CPnzejgUSAMGm5yC1gZHkDt4atQdjYAjJZVUx%5CRlUrRzPxWiE3BlG3t%2Fs7xAJ%5C2TW1OR7oiM%2F4PvwKwiI4oj9P6ALxKaDrTRyPZhNHeiT%3A1722259465069", - "SESSIONID": "jbNinEEfFAY9yMN2vgpUPblqsLKGEm5IYKl1Lt2ripF", - "JOID": "V10WB0Jst9Ts_0beWmxLSndHLzlKBtOdiJ0DnTsm9ovSiweQbk0heYvzQN9QEPqH8CAwbxbw5C7iCfEwV9Kj-2o", - "osd": "VlkTAktts9Hp9kfaX2lCS3NCKjBLAtaYgZwHmD4v94_Xjg6RakgkcIr3RdpZEf6C9SkxaxP17S_mDPQ5Vtam_mM", - "tst": "h", - "Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49": "1722259680", - "BEC": "32377ec81629ec05d48c98f32428ae46" -} \ No newline at end of file diff --git a/example.js b/example.js index 753d4bb..a61e733 100644 --- a/example.js +++ b/example.js @@ -1,38 +1,15 @@ import { launch } from 'puppeteer'; import 'dotenv/config.js' +import { injectionCookie } from './utils/userUtils'; (async () => { const browser = await launch({ headless: true, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); const page = await browser.newPage(); - await page.setViewport({ width: 1920, height: 1080 }); - const dev = 'https://dev-dmp.meiguanjia.net/login'; - const stging = 'https://boss.aizhb.net/login'; - let current = ''; - if (process.env.PROFILE === 'dev') { - current = dev; - } else if (process.env.PROFILE === 'stging') { - current = stging; - } - console.log(current); - console.log(process.env.ACCOUNT); - await page.goto(current); - await page.locator('#userName input').fill(process.env.ACCOUNT); - await page.locator('.forgot_box').click(); - await page.waitForSelector('.modify_main'); - await page.locator('#digitalCode input').fill('1'); - await page.locator('#smsCode input').fill(process.env.SMS); - await page.locator('#newPassWord input').fill(process.env.PASSWORD); - await page.locator('#confirmPassWord input').fill(process.env.PASSWORD); - await page.locator('[type="submit"]').click(); - await page.waitForSelector('.arco-message .arco-message-content', { visible: true }); - const arcoMessage = await page.$eval('.arco-message .arco-message-content', node => node.innerHTML); - await page.screenshot({ path: 'resetPassword.png' }); - console.log(arcoMessage); - - page.on('console', msg => { - console.log('PAGE:', msg.text()) - }) + // 注入cookie + const webUrl = 'https://www.zhihu.com/hot'; + const cookieFileUrl = './data/zhihu.json'; + await injectionCookie(page, webUrl, cookieFileUrl); await browser.close(); })(); \ No newline at end of file diff --git a/uselogin.js b/uselogin.js index 6fb170a..51b54d1 100644 --- a/uselogin.js +++ b/uselogin.js @@ -1,37 +1,15 @@ import { launch } from 'puppeteer'; import fs from 'fs'; import 'dotenv/config.js'; - -import { setLocalStorage } from './utils/userUtils.js'; +import { injectionCookie } from './utils/userUtils.js'; (async () => { const browser = await launch({ headless: false, devtools: true, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); const page = await browser.newPage(); - await page.goto('https://www.zhihu.com/hot'); - const outputLocalStorge = fs.readFileSync('./data/zhihu.json', 'utf-8'); - let localStorageTest = JSON.parse(outputLocalStorge); - let tmpcookies = await page.cookies(); - console.log(tmpcookies); - await page.deleteCookie(...tmpcookies); - - const cookies = new Array(); - for (const iterator in localStorageTest) { - cookies.push({ - name: iterator, - value: localStorageTest[iterator], - domain: '.zhihu.com', - path: '/', - httpOnly: true, - secure: true - }); - console.log(localStorageTest[iterator]); - } - - console.log(cookies); - - await page.setCookie(...cookies); - - await page.goto('https://www.zhihu.com/hot'); - // await browser.close(); + const webUrl = 'https://www.zhihu.com/hot'; + const cookieFileUrl = './data/zhihu.json'; + await injectionCookie(page, webUrl, cookieFileUrl); + + // browser.close(); })(); \ No newline at end of file diff --git a/utils/businessDataUtil.js b/utils/businessDataUtil.js deleted file mode 100644 index fa96d75..0000000 --- a/utils/businessDataUtil.js +++ /dev/null @@ -1,36 +0,0 @@ -export async function getDate(page, fs, file) { - await new Promise(r => setTimeout(r, 2000)); - await page.waitForSelector('.ai_custome'); - const aiCustome = await page.$$('.ai_custome .ai_custome_item .amount'); - let ai = new Set(); - for (const e of aiCustome) { - let str = await page.evaluate(element => element.textContent, e); - ai.add(str); - } - - let importStorage = '\n' + JSON.stringify(Array.from(ai), null, 2) + '\n'; - fs.appendFileSync(file, importStorage, 'utf-8'); - - const elementHandle = await page.waitForSelector('.bi_warp iframe'); - const frame = await elementHandle.contentFrame(); - await frame.waitForSelector('#label-content'); - const items = await frame.$$('#label-content'); // Use $$ to get all matching elements - let its = new Set(); - if (items.length > 0) { // Check if items array is not empty - for (const e of items) { - let str = await frame.evaluate(element => element.textContent, e); // Use frame.evaluate() here - its.add(str.replace(/\s+/g, ' ')); - } - } else { - console.log('#label-content not found in the iframe'); - } - - importStorage = JSON.stringify(Array.from(its), null, 2) + '\n'; - fs.appendFileSync(file, importStorage, 'utf-8'); -} - -export async function cleanType(page, element, timeContent) { - const time = await page.$(element); - await time.click({ clickCount: 3 }); - await time.type(timeContent, { delay: 100 }); -} \ No newline at end of file diff --git a/utils/userUtils.js b/utils/userUtils.js index db9513d..9351ed6 100644 --- a/utils/userUtils.js +++ b/utils/userUtils.js @@ -19,4 +19,21 @@ export async function setLocalStorage(page, file) { }, localStorageTest); console.log('set success'); +} + +/** + * @param {Page} page + * @param {String} webUrl 网站 + * @param {String} cookieFile cookie地址Url + */ +export async function injectionCookie(page, webUrl, cookieFile) { + try { + await page.goto(webUrl); + const cookies = JSON.parse(fs.readFileSync(cookieFile, 'utf-8')); + console.log(cookies); + await page.setCookie(...cookies); + await page.goto(webUrl); + } catch (error) { + console.error(error); + } } \ No newline at end of file From a8e451a8c25a03690e5144a129f7cf0930b1b0d3 Mon Sep 17 00:00:00 2001 From: rsgltzyd Date: Tue, 30 Jul 2024 21:23:35 +0800 Subject: [PATCH 4/7] add injectionCookie fix --- example.js | 15 --------------- examples/zhihu.js | 26 ++++++++++++++++++++++++++ uselogin.js | 15 --------------- utils/userUtils.js | 1 - 4 files changed, 26 insertions(+), 31 deletions(-) delete mode 100644 example.js create mode 100644 examples/zhihu.js delete mode 100644 uselogin.js diff --git a/example.js b/example.js deleted file mode 100644 index a61e733..0000000 --- a/example.js +++ /dev/null @@ -1,15 +0,0 @@ -import { launch } from 'puppeteer'; -import 'dotenv/config.js' -import { injectionCookie } from './utils/userUtils'; - -(async () => { - const browser = await launch({ headless: true, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); - const page = await browser.newPage(); - - // 注入cookie - const webUrl = 'https://www.zhihu.com/hot'; - const cookieFileUrl = './data/zhihu.json'; - await injectionCookie(page, webUrl, cookieFileUrl); - - await browser.close(); -})(); \ No newline at end of file diff --git a/examples/zhihu.js b/examples/zhihu.js new file mode 100644 index 0000000..6c52473 --- /dev/null +++ b/examples/zhihu.js @@ -0,0 +1,26 @@ +import { launch } from 'puppeteer'; +import 'dotenv/config.js' +import { injectionCookie } from '../utils/userUtils.js'; + +(async () => { + const browser = await launch({ headless: false, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); + const page = await browser.newPage(); + + // 注入cookie + const webUrl = 'https://www.zhihu.com/hot'; + const cookieFileUrl = './data/zhihu.json'; + await injectionCookie(page, webUrl, cookieFileUrl); + + const hotItems = await page.$$('.HotList-list .HotItem'); + // console.log(hotItems); + console.log("========"); + for (const hot in hotItems) { + if (Object.hasOwnProperty.call(hotItems, hot)) { + const element = hotItems[hot]; + const textContent = await page.evaluate(node => node.textContent.trim(), element); + console.log(textContent); + } + } + + // await browser.close(); +})(); \ No newline at end of file diff --git a/uselogin.js b/uselogin.js deleted file mode 100644 index 51b54d1..0000000 --- a/uselogin.js +++ /dev/null @@ -1,15 +0,0 @@ -import { launch } from 'puppeteer'; -import fs from 'fs'; -import 'dotenv/config.js'; -import { injectionCookie } from './utils/userUtils.js'; - -(async () => { - const browser = await launch({ headless: false, devtools: true, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); - const page = await browser.newPage(); - - const webUrl = 'https://www.zhihu.com/hot'; - const cookieFileUrl = './data/zhihu.json'; - await injectionCookie(page, webUrl, cookieFileUrl); - - // browser.close(); -})(); \ No newline at end of file diff --git a/utils/userUtils.js b/utils/userUtils.js index 9351ed6..6347fc2 100644 --- a/utils/userUtils.js +++ b/utils/userUtils.js @@ -30,7 +30,6 @@ export async function injectionCookie(page, webUrl, cookieFile) { try { await page.goto(webUrl); const cookies = JSON.parse(fs.readFileSync(cookieFile, 'utf-8')); - console.log(cookies); await page.setCookie(...cookies); await page.goto(webUrl); } catch (error) { From f5538401321cadd5418629b2924840779ee4a618 Mon Sep 17 00:00:00 2001 From: rsgltzyd Date: Tue, 30 Jul 2024 22:18:39 +0800 Subject: [PATCH 5/7] fix --- .gitignore | 1 + examples/zhihu.js | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index d8e98cd..72c9fe7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ node_modules/ businessData.json data/user.json data/zhihu.json +data/zhihuData.json diff --git a/examples/zhihu.js b/examples/zhihu.js index 6c52473..6407a11 100644 --- a/examples/zhihu.js +++ b/examples/zhihu.js @@ -1,6 +1,7 @@ import { launch } from 'puppeteer'; import 'dotenv/config.js' import { injectionCookie } from '../utils/userUtils.js'; +import fs from 'fs'; (async () => { const browser = await launch({ headless: false, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); @@ -12,15 +13,26 @@ import { injectionCookie } from '../utils/userUtils.js'; await injectionCookie(page, webUrl, cookieFileUrl); const hotItems = await page.$$('.HotList-list .HotItem'); - // console.log(hotItems); console.log("========"); + + const hotText = new Array(); + for (const hot in hotItems) { if (Object.hasOwnProperty.call(hotItems, hot)) { const element = hotItems[hot]; - const textContent = await page.evaluate(node => node.textContent.trim(), element); - console.log(textContent); + + const h2Element = await element.$('h2'); + const pElement = await element.$('p'); + + const title = h2Element != null ? await h2Element.evaluate(e => e.textContent) : ''; + const excerpt = pElement != null ? await pElement.evaluate(e => e.textContent) : ''; + + hotText.push({ title, excerpt }); } } + console.log("end..."); - // await browser.close(); + fs.writeFileSync('./data/zhihuData.json', JSON.stringify(hotText, null, 2)); + + await browser.close(); })(); \ No newline at end of file From df3560cc6f80fa3f283b7bed62bc69c3a9469e63 Mon Sep 17 00:00:00 2001 From: wenpeng Date: Wed, 11 Sep 2024 23:13:20 +0800 Subject: [PATCH 6/7] =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E5=B7=A5?= =?UTF-8?q?=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .prettierrc | 13 ++++++++++ examples/zhihu.js | 64 +++++++++++++++++++++++++++++++++------------- package.json | 2 +- utils/userUtils.js | 22 ++++++++-------- 4 files changed, 71 insertions(+), 30 deletions(-) create mode 100644 .prettierrc diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..02d8a0d --- /dev/null +++ b/.prettierrc @@ -0,0 +1,13 @@ +{ + "printWidth": 100, + "tabWidth": 4, + "semi": true, + "singleQuote": false, + "quoteProps": "consistent", + "trailingComma": "es5", + "bracketSpacing": true, + "arrowParens": "avoid", + "proseWrap": "never", + "endOfLine": "lf", + "embeddedLanguageFormatting": "off" +} \ No newline at end of file diff --git a/examples/zhihu.js b/examples/zhihu.js index 6407a11..27773f8 100644 --- a/examples/zhihu.js +++ b/examples/zhihu.js @@ -1,19 +1,26 @@ -import { launch } from 'puppeteer'; -import 'dotenv/config.js' -import { injectionCookie } from '../utils/userUtils.js'; -import fs from 'fs'; +import { launch } from "puppeteer"; +import "dotenv/config.js"; +import { injectionCookie } from "../utils/userUtils.js"; +import fs from "fs"; +import dotenv from "dotenv"; +dotenv.config(); (async () => { - const browser = await launch({ headless: false, args: [`--window-size=1920,1080`], defaultViewport: { width: 1920, height: 1080 } }); + const browser = await launch({ + headless: false, + args: [`--window-size=1920,1080`], + defaultViewport: { width: 1920, height: 1080 }, + }); const page = await browser.newPage(); + page.on("console", msg => console.log("PAGE LOG", msg.text())); + page.on("error", error => console.error("PAGE error", error)); // 注入cookie - const webUrl = 'https://www.zhihu.com/hot'; - const cookieFileUrl = './data/zhihu.json'; + const webUrl = "https://www.zhihu.com/hot"; + const cookieFileUrl = "./data/zhihu.json"; await injectionCookie(page, webUrl, cookieFileUrl); - const hotItems = await page.$$('.HotList-list .HotItem'); - console.log("========"); + const hotItems = await page.$$(".HotList-list .HotItem"); const hotText = new Array(); @@ -21,18 +28,39 @@ import fs from 'fs'; if (Object.hasOwnProperty.call(hotItems, hot)) { const element = hotItems[hot]; - const h2Element = await element.$('h2'); - const pElement = await element.$('p'); + const h2Element = await element.$("h2"); + const pElement = await element.$("p"); - const title = h2Element != null ? await h2Element.evaluate(e => e.textContent) : ''; - const excerpt = pElement != null ? await pElement.evaluate(e => e.textContent) : ''; - - hotText.push({ title, excerpt }); + const title = h2Element != null ? await h2Element.evaluate(e => e.textContent) : ""; + const excerpt = pElement != null ? await pElement.evaluate(e => e.textContent) : ""; + const createTime = Date.now(); + hotText.push({ title, excerpt, createTime }); } } - console.log("end..."); - fs.writeFileSync('./data/zhihuData.json', JSON.stringify(hotText, null, 2)); + fs.writeFileSync("./data/zhihuData.json", JSON.stringify(hotText, null, 2)); + + const url = process.env.URL; + console.log(process.env.URL); + + try { + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(hotText, null, 2), + }); + + if (!response.ok) { + throw new Error(`Http error! status : ${response.status}`); + } + + const responseData = await response.json(); + console.log("Repsonse data", responseData); + } catch (error) { + console.error(error); + } await browser.close(); -})(); \ No newline at end of file +})(); diff --git a/package.json b/package.json index 3d6cedf..0dcb59f 100644 --- a/package.json +++ b/package.json @@ -4,4 +4,4 @@ "dotenv": "^16.4.5", "puppeteer": "^22.13.1" } -} +} \ No newline at end of file diff --git a/utils/userUtils.js b/utils/userUtils.js index 6347fc2..9de59f4 100644 --- a/utils/userUtils.js +++ b/utils/userUtils.js @@ -1,38 +1,38 @@ -import fs from 'fs'; +import fs from "fs"; /** * 设置页面的LocalStorage - * @param {*} page + * @param {*} page * @param {String} file 用户信息 */ export async function setLocalStorage(page, file) { - const outputLocalStorge = fs.readFileSync(file, 'utf-8'); + const outputLocalStorge = fs.readFileSync(file, "utf-8"); let localStorageTest = JSON.parse(outputLocalStorge); // 设置页面的localStorage await page.evaluateOnNewDocument(localStorageTest => { localStorage.clear(); - localStorage.setItem('merchant_access_token', localStorageTest.merchant_access_token); - localStorage.setItem('merchant_refresh_token', localStorageTest.merchant_refresh_token); - localStorage.setItem('merchant_login_data', localStorageTest.merchant_login_data); - localStorage.setItem('merchant_metadata_data', localStorageTest.merchant_metadata_data); + localStorage.setItem("merchant_access_token", localStorageTest.merchant_access_token); + localStorage.setItem("merchant_refresh_token", localStorageTest.merchant_refresh_token); + localStorage.setItem("merchant_login_data", localStorageTest.merchant_login_data); + localStorage.setItem("merchant_metadata_data", localStorageTest.merchant_metadata_data); }, localStorageTest); - console.log('set success'); + console.log("set success"); } /** - * @param {Page} page + * @param {Page} page * @param {String} webUrl 网站 * @param {String} cookieFile cookie地址Url */ export async function injectionCookie(page, webUrl, cookieFile) { try { await page.goto(webUrl); - const cookies = JSON.parse(fs.readFileSync(cookieFile, 'utf-8')); + const cookies = JSON.parse(fs.readFileSync(cookieFile, "utf-8")); await page.setCookie(...cookies); await page.goto(webUrl); } catch (error) { console.error(error); } -} \ No newline at end of file +} From 188d115a26317d60aa5a8292df28c131c7ca1562 Mon Sep 17 00:00:00 2001 From: wenpeng Date: Wed, 11 Sep 2024 23:14:15 +0800 Subject: [PATCH 7/7] fix --- utils/userUtils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/userUtils.js b/utils/userUtils.js index 9de59f4..1100792 100644 --- a/utils/userUtils.js +++ b/utils/userUtils.js @@ -2,7 +2,7 @@ import fs from "fs"; /** * 设置页面的LocalStorage - * @param {*} page + * @param {import('puppeteer').Page} page * @param {String} file 用户信息 */ export async function setLocalStorage(page, file) {