Skip to content

Instantly share code, notes, and snippets.

@waynewaynetsai
Last active April 17, 2020 17:30
Show Gist options
  • Save waynewaynetsai/64af7f7526298e68facdf7f103bfd580 to your computer and use it in GitHub Desktop.
Save waynewaynetsai/64af7f7526298e68facdf7f103bfd580 to your computer and use it in GitHub Desktop.
import { of, fromEvent, empty, pipe, Observable, timer } from 'rxjs';
import { ajax } from 'rxjs/ajax';
import { map, toArray, switchMap } from 'rxjs/operators';
import * as rx from 'rxjs';
import { expand, mapTo, delay, take, tap, takeWhile } from 'rxjs/operators';
/**
* 需求:執行一個爬蟲任務,在爬完第一頁之後會根據內容爬取接下來的n頁(或者不爬),最後回傳各分頁資料合併的結果。
*/
const clicks = fromEvent(document, 'click');
const mockPageContent = [
`WebContent: page 1`,
`WebContent: page 2`,
`WebContent: page 3`,
`WebContent: page 4`,
`Wrong Page 1`,
`Wrong Page 2`
];
const mockAjax = (pageIndex) => timer(500).pipe(
mapTo(mockPageContent[pageIndex-1]),
take(1)
);
const crawl = (pageIndex: number) => pipe(
switchMap((pageIndex) => mockAjax(pageIndex)),
map((content) => ({ index: pageIndex, content})),
);
const crawl$ = (pageIndex: number) => of(pageIndex).pipe(
crawl(pageIndex)
);
const keepCrawl = (content: string) => /WebContent/.test(content);
const crawlRequest$ = (startIndex) => of(startIndex).pipe(
crawl(startIndex),
expand(x => (keepCrawl(x.content)) ? crawl$(x.index+1): empty()),
toArray()
);
const triggerCrawl$ = fromEvent(document, 'click').pipe(
switchMap((_) => crawlRequest$(1))
);
triggerCrawl$.subscribe(console.log);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment