From 2c525c56625e4ae9429147d17808b5422353562c Mon Sep 17 00:00:00 2001 From: Dmitry Ilvokhin Date: Sat, 26 Jul 2025 20:12:44 +0100 Subject: Properly extend parser to parse properties for sale Previous commit had only test files, but not actual code changes. --- parse.go | 30 +++++++++++++++++++++++------- parse_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/parse.go b/parse.go index a5ee3ce..1298fa1 100644 --- a/parse.go +++ b/parse.go @@ -40,9 +40,18 @@ func findNodes(root *html.Node) []*html.Node { if n.Data != "a" { continue } + // Try to match attribute for let. attr := matchAttr(n, "data-testid") if attr == nil || attr.Val != "property-price" { - continue + // If unsuccessful, try to match attribute for buy. + attr = matchAttr(n, "data-test") + if attr == nil || attr.Val != "property-header" { + continue + } + href := matchAttr(n, "href") + if href == nil || href.Val == "" { + continue + } } flats = append(flats, n) } @@ -73,17 +82,24 @@ func parseNode(root *html.Node) (flat, error) { f.Price = price return f, nil } + if strings.HasPrefix(n.Data, "£") { + f.Price = strings.TrimSpace(n.Data) + return f, nil + } } return flat{}, errors.New("Couldn't find price") } func parseID(path string) (int, error) { s, _ := strings.CutPrefix(path, "/properties/") - maybeID, _ := strings.CutSuffix(s, "#/?channel=RES_LET") - ID, err := strconv.Atoi(maybeID) - if err != nil { - err := fmt.Errorf("Couldn't extract ID from %q", path) - return -1, err + for _, channel := range []string{"LET", "BUY"} { + suffix := fmt.Sprintf("#/?channel=RES_%v", channel) + maybeID, _ := strings.CutSuffix(s, suffix) + ID, err := strconv.Atoi(maybeID) + if err == nil { + return ID, nil + } } - return ID, err + err := fmt.Errorf("Couldn't extract ID from %q", path) + return -1, err } diff --git a/parse_test.go b/parse_test.go index 280e8a4..59c14ed 100644 --- a/parse_test.go +++ b/parse_test.go @@ -66,3 +66,35 @@ func TestParseDulicates(t *testing.T) { t.Errorf("Parse failed: got: %v, want: %v", got, want) } } + +func TestParseBuy(t *testing.T) { + filename := "2025-03-31-buy.html" + data, err := os.ReadFile(filepath.Join("testdata", filename)) + if err != nil { + t.Fatalf("Could not read %v", filename) + } + want := []flat{ + flat{ + ID: 158566946, + Price: "£900,000", + }, + flat{ + ID: 160016081, + Price: "£500,000", + }, + flat{ + ID: 160019057, + Price: "£575,000", + }, + flat{ + ID: 160020590, + Price: "£400,000", + }} + got, err := parse(data) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(got, want) { + t.Errorf("Parse failed: got: %v, want: %v", got, want) + } +} -- cgit v1.2.3-70-g09d2