-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.go
127 lines (101 loc) · 3.12 KB
/
scraper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package main
import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"sync"
"time"
web_scrape2 "web-scape-go/web-scrape"
"github.com/chromedp/chromedp"
)
// Ticket names and base URL
var ticketNames = []string{
"govisetha",
"mahajana-sampatha",
"dhana-nidhanaya",
"mega-power",
"lucky-7",
"handahana",
}
const baseURL = "https://www.nlb.lk/results/"
// Combined structure to store all ticket results
type CombinedResults struct {
Tickets map[string]interface{} `json:"tickets"`
}
// scrapeTicket scrapes the results for a single ticket
func scrapeTicket(ticketName string, wg *sync.WaitGroup, resultsChan chan<- map[string]interface{}) {
defer wg.Done() // Decrement the wait group counter when done
// Set a timeout for the scrape
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
// Create a new ChromeDP context
chromeCtx, chromeCancel := chromedp.NewContext(ctx)
defer chromeCancel()
// Construct the URL
url := baseURL + ticketName
// Variable to store the page content
var htmlContent string
// Scrape the page
err := chromedp.Run(chromeCtx,
chromedp.Navigate(url),
chromedp.WaitVisible("table.tbl", chromedp.ByQuery), // Wait for the table to load
chromedp.OuterHTML("html", &htmlContent),
)
if err != nil {
log.Printf("Failed to scrape %s: %v", ticketName, err)
return
}
// Process the HTML content
results := web_scrape2.NlbLotteryResults(htmlContent)
// Send the results to the channel
resultsChan <- map[string]interface{}{ticketName: results}
log.Printf("Successfully scraped results for %s", ticketName)
}
func main() {
startTime := time.Now() // Capture start time
// Create output directory
err := os.MkdirAll("results", 0755)
if err != nil {
log.Fatalf("Failed to create results directory: %v", err)
}
// Combined results structure
combinedResults := CombinedResults{
Tickets: make(map[string]interface{}),
}
// Use a wait group and a channel to manage concurrency and collect results
var wg sync.WaitGroup
resultsChan := make(chan map[string]interface{}, len(ticketNames))
// Iterate over ticket names and scrape concurrently
for _, ticketName := range ticketNames {
wg.Add(1)
go scrapeTicket(ticketName, &wg, resultsChan)
}
// Close the channel after all goroutines are done
go func() {
wg.Wait()
close(resultsChan)
}()
// Collect results from the channel
for result := range resultsChan {
for ticketName, ticketData := range result {
combinedResults.Tickets[ticketName] = ticketData
}
}
// Save combined results to a single JSON file
combinedFilePath := filepath.Join("results", "all_tickets.json")
combinedJSON, err := json.MarshalIndent(combinedResults, "", " ")
if err != nil {
log.Fatalf("Failed to marshal combined JSON: %v", err)
}
err = os.WriteFile(combinedFilePath, combinedJSON, 0644)
if err != nil {
log.Fatalf("Failed to write combined JSON to file: %v", err)
}
log.Printf("Combined results saved to %s", combinedFilePath)
endTime := time.Now() // Capture end time
elapsed := endTime.Sub(startTime) // Calculate duration
fmt.Printf("Execution time: %s\n", elapsed)
}