summaryrefslogtreecommitdiff
path: root/parse.go
blob: 1298fa185f273bf59441787fb65edad145a3abec (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package main

import (
	"bytes"
	"errors"
	"fmt"
	"slices"
	"strconv"
	"strings"

	"golang.org/x/net/html"
)

func parse(body []byte) ([]flat, error) {
	doc, err := html.Parse(bytes.NewReader(body))
	if err != nil {
		return make([]flat, 0), err
	}
	flats := make([]flat, 0)
	for _, n := range findNodes(doc) {
		flat, err := parseNode(n)
		if err != nil {
			continue
		}
		flats = append(flats, flat)
	}
	slices.SortFunc(flats, compareID)
	flats = slices.CompactFunc(flats, func(a, b flat) bool {
		return compareID(a, b) == 0
	})
	return flats, nil
}

func findNodes(root *html.Node) []*html.Node {
	flats := make([]*html.Node, 0)
	for n := range root.Descendants() {
		if n.Type != html.ElementNode {
			continue
		}
		if n.Data != "a" {
			continue
		}
		// Try to match attribute for let.
		attr := matchAttr(n, "data-testid")
		if attr == nil || attr.Val != "property-price" {
			// If unsuccessful, try to match attribute for buy.
			attr = matchAttr(n, "data-test")
			if attr == nil || attr.Val != "property-header" {
				continue
			}
			href := matchAttr(n, "href")
			if href == nil || href.Val == "" {
				continue
			}
		}
		flats = append(flats, n)
	}
	return flats
}

func matchAttr(n *html.Node, key string) *html.Attribute {
	for _, attr := range n.Attr {
		if attr.Key == key {
			return &attr
		}
	}
	return nil
}

func parseNode(root *html.Node) (flat, error) {
	url := matchAttr(root, "href")
	if url == nil {
		return flat{}, errors.New("Couldn't find URL")
	}
	ID, err := parseID(url.Val)
	if err != nil {
		return flat{}, err
	}
	f := flat{ID, ""}
	for n := range root.Descendants() {
		if price, found := strings.CutSuffix(n.Data, " pcm"); found {
			f.Price = price
			return f, nil
		}
		if strings.HasPrefix(n.Data, "£") {
			f.Price = strings.TrimSpace(n.Data)
			return f, nil
		}
	}
	return flat{}, errors.New("Couldn't find price")
}

func parseID(path string) (int, error) {
	s, _ := strings.CutPrefix(path, "/properties/")
	for _, channel := range []string{"LET", "BUY"} {
		suffix := fmt.Sprintf("#/?channel=RES_%v", channel)
		maybeID, _ := strings.CutSuffix(s, suffix)
		ID, err := strconv.Atoi(maybeID)
		if err == nil {
			return ID, nil
		}
	}
	err := fmt.Errorf("Couldn't extract ID from %q", path)
	return -1, err
}