Last active
April 17, 2020 17:30
-
-
Save waynewaynetsai/64af7f7526298e68facdf7f103bfd580 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { of, fromEvent, empty, pipe, Observable, timer } from 'rxjs'; | |
import { ajax } from 'rxjs/ajax'; | |
import { map, toArray, switchMap } from 'rxjs/operators'; | |
import * as rx from 'rxjs'; | |
import { expand, mapTo, delay, take, tap, takeWhile } from 'rxjs/operators'; | |
/** | |
* 需求:執行一個爬蟲任務,在爬完第一頁之後會根據內容爬取接下來的n頁(或者不爬),最後回傳各分頁資料合併的結果。 | |
*/ | |
const clicks = fromEvent(document, 'click'); | |
const mockPageContent = [ | |
`WebContent: page 1`, | |
`WebContent: page 2`, | |
`WebContent: page 3`, | |
`WebContent: page 4`, | |
`Wrong Page 1`, | |
`Wrong Page 2` | |
]; | |
const mockAjax = (pageIndex) => timer(500).pipe( | |
mapTo(mockPageContent[pageIndex-1]), | |
take(1) | |
); | |
const crawl = (pageIndex: number) => pipe( | |
switchMap((pageIndex) => mockAjax(pageIndex)), | |
map((content) => ({ index: pageIndex, content})), | |
); | |
const crawl$ = (pageIndex: number) => of(pageIndex).pipe( | |
crawl(pageIndex) | |
); | |
const keepCrawl = (content: string) => /WebContent/.test(content); | |
const crawlRequest$ = (startIndex) => of(startIndex).pipe( | |
crawl(startIndex), | |
expand(x => (keepCrawl(x.content)) ? crawl$(x.index+1): empty()), | |
toArray() | |
); | |
const triggerCrawl$ = fromEvent(document, 'click').pipe( | |
switchMap((_) => crawlRequest$(1)) | |
); | |
triggerCrawl$.subscribe(console.log); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment