Last active October 18, 2022 23:26
List of all Films of any language and year ({lang}/{year}) Scraping Wikipedia to get list of films of any language and year

Heroku App


  • gocolly
  • gorilla mux

Information from wikipedia.

package main
import (
var response string
func hello(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, "Hello Guys!")
func determineListenAddress() (string, error) {
port := os.Getenv("PORT")
port = "80"
if port == "" {
return "", fmt.Errorf("$PORT not set")
return ":" + port, nil
func main() {
addr, err := determineListenAddress()
if err != nil {
router := mux.NewRouter()
router.HandleFunc("/", hello)
router.HandleFunc("/{lang}/{year}", updateMoviesList)
if err := http.ListenAndServe(addr, router); err != nil {
func updateMoviesList(w http.ResponseWriter, r *http.Request) {
var lang, year string
lang = mux.Vars(r)["lang"]
year = mux.Vars(r)["year"]
response = ""
c := colly.NewCollector()
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
c.OnHTML("table.wikitable", func(e *colly.HTMLElement) {
headers := e.ChildTexts("th")
headersLength := len(headers)
e.ForEach("th", func(_ int, el *colly.HTMLElement) {
h := el.DOM.Text()
if strings.TrimSpace(strings.ToLower(h)) == "cast" {
if headersLength == 8 {
headersLength = headersLength - 1
} else if headersLength <= 5 {
headersLength = headersLength + 1
e.ForEach("tr", func(_ int, ele *colly.HTMLElement) {
elements := ele.ChildTexts("td")
var title string
elemsLength := len(elements)
if elemsLength == headersLength+1 {
title = elements[2]
} else if elemsLength == headersLength {
title = elements[1]
} else if elemsLength == headersLength-1 {
title = elements[0]
if title != "" {
response = response + title + "\n"
c.Visit("" + lang + "_films_of_" + year)
fmt.Fprintln(w, response)
