Skip to content

Commit

Permalink
Merge pull request #14 from austin1237/cache
Browse files Browse the repository at this point in the history
dynamo cache
  • Loading branch information
austin1237 authored Mar 3, 2024
2 parents c867722 + cbb6388 commit 1c7cae0
Show file tree
Hide file tree
Showing 19 changed files with 465 additions and 29 deletions.
3 changes: 2 additions & 1 deletion go.work.sum
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
65 changes: 65 additions & 0 deletions scraper/cache/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package cache

import (
"scraper/job"
)

type Table interface {
ReadItem(company string) (string, error)
WriteItems(companies []string)
}

type Cache struct {
table Table
}

func NewCache(table Table) *Cache {
return &Cache{table: table}
}

func (c *Cache) FilterCachedCompanies(jobs []job.Job) ([]job.Job, error) {
notInCache := make([]job.Job, 0)
errChan := make(chan error, len(jobs))
notFoundChan := make(chan job.Job, len(jobs))
foundChan := make(chan job.Job, len(jobs))

for _, newJob := range jobs {
go func(newJob job.Job) {
result, err := c.table.ReadItem(newJob.Company)
if result == "" {
// company is not in the cache
notFoundChan <- newJob
} else {
foundChan <- newJob
}

if err != nil {
errChan <- err
}

}(newJob)
}

// Collect results from the goroutines
for range jobs {
select {
case job := <-notFoundChan:
notInCache = append(notInCache, job)
case <-foundChan:
// do nothing
case err := <-errChan:
return nil, err
}

}

return notInCache, nil
}

func (c *Cache) WriteCompaniesToCache(jobs []job.Job) {
companies := make([]string, 0, len(jobs))
for _, job := range jobs {
companies = append(companies, job.Company)
}
c.table.WriteItems(companies)
}
63 changes: 63 additions & 0 deletions scraper/cache/cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package cache

import (
"scraper/job"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)

type MockTable struct {
mock.Mock
}

func (m *MockTable) ReadItem(company string) (string, error) {
args := m.Called(company)
return args.String(0), args.Error(1)
}

func (m *MockTable) WriteItems(companies []string) {
m.Called(companies)
}

func TestFilterCachedCompanies(t *testing.T) {
mockTable := new(MockTable)
mockTable.On("ReadItem", "Acme Corp").Return("Acme Corp", nil)
mockTable.On("ReadItem", "Globex Corporation").Return("", nil)

cache := &Cache{
table: mockTable,
}

// Test the FilterCachedCompanies method
jobs := []job.Job{
{Company: "Acme Corp"},
{Company: "Globex Corporation"},
}
notInCache, err := cache.FilterCachedCompanies(jobs)

assert.NoError(t, err)
assert.Len(t, notInCache, 1)
assert.Equal(t, "Globex Corporation", notInCache[0].Company)

mockTable.AssertExpectations(t)
}

func TestWriteCompaniesToCache(t *testing.T) {
mockTable := new(MockTable)
mockTable.On("WriteItems", []string{"Acme Corp", "Globex Corporation"}).Return()

cache := &Cache{
table: mockTable,
}

// Test the WriteCompaniesToCache method
jobs := []job.Job{
{Company: "Acme Corp"},
{Company: "Globex Corporation"},
}
cache.WriteCompaniesToCache(jobs)

mockTable.AssertExpectations(t)
}
98 changes: 98 additions & 0 deletions scraper/dynamo/dynamo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package dynamo

import (
"log"
"strconv"
"strings"
"sync"
"time"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/dynamodb"
)

type DynamoDBAPI interface {
UpdateItem(input *dynamodb.UpdateItemInput) (*dynamodb.UpdateItemOutput, error)
GetItem(input *dynamodb.GetItemInput) (*dynamodb.GetItemOutput, error)
}

type Table struct {
Name string
svc DynamoDBAPI
}

func NewTable(name string, region string) (*Table, error) {
sess, err := session.NewSession(&aws.Config{
Region: aws.String(region), // replace with your region
})
if err != nil {
return nil, err
}

svc := dynamodb.New(sess)

return &Table{Name: name, svc: svc}, nil
}

func (t *Table) ReadItem(company string) (string, error) {
input := &dynamodb.GetItemInput{
TableName: aws.String(t.Name),
Key: map[string]*dynamodb.AttributeValue{
"company": {
S: aws.String(strings.ToLower(company)),
},
},
}

result, err := t.svc.GetItem(input)
if err != nil {
return "", err
}

if result.Item == nil {
return "", nil
}

return *result.Item["company"].S, nil
}

func (t *Table) WriteItems(companies []string) {
// Set the ttl time to 30 days from now
expirationTime := time.Now().AddDate(0, 1, 0).Unix()

// Create a wait group
var wg sync.WaitGroup

// Write each company to the table in a separate goroutine
for _, company := range companies {
wg.Add(1)
go func(company string) {
defer wg.Done()

input := &dynamodb.UpdateItemInput{
ExpressionAttributeValues: map[string]*dynamodb.AttributeValue{
":expirationTime": {
N: aws.String(strconv.FormatInt(expirationTime, 10)),
},
},
TableName: aws.String(t.Name),
Key: map[string]*dynamodb.AttributeValue{
"company": {
S: aws.String(strings.ToLower(company)),
},
},
ReturnValues: aws.String("UPDATED_NEW"),
UpdateExpression: aws.String("set ExpirationTime = :expirationTime"),
}

_, err := t.svc.UpdateItem(input)
if err != nil {
log.Println("Error writing company to cache", err)
}
}(company)
}

// Wait for all goroutines to finish
wg.Wait()
}
52 changes: 52 additions & 0 deletions scraper/dynamo/dynamo_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package dynamo

import (
"testing"

"github.com/aws/aws-sdk-go/service/dynamodb"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)

type MockDynamoDB struct {
mock.Mock
}

func (m *MockDynamoDB) UpdateItem(input *dynamodb.UpdateItemInput) (*dynamodb.UpdateItemOutput, error) {
args := m.Called(input)
return args.Get(0).(*dynamodb.UpdateItemOutput), args.Error(1)
}

func (m *MockDynamoDB) GetItem(input *dynamodb.GetItemInput) (*dynamodb.GetItemOutput, error) {
args := m.Called(input)
return args.Get(0).(*dynamodb.GetItemOutput), args.Error(1)
}

func TestNewTable(t *testing.T) {
table, err := NewTable("test", "us-west-2")
assert.NoError(t, err)
assert.NotNil(t, table)
}

func TestReadItem(t *testing.T) {
mockSvc := new(MockDynamoDB)
table := &Table{Name: "test", svc: mockSvc}

mockSvc.On("GetItem", mock.Anything).Return(&dynamodb.GetItemOutput{}, nil)

_, err := table.ReadItem("Acme Corp")
assert.NoError(t, err)

mockSvc.AssertExpectations(t)
}

func TestWriteItems(t *testing.T) {
mockSvc := new(MockDynamoDB)
table := &Table{Name: "test", svc: mockSvc}

mockSvc.On("UpdateItem", mock.Anything).Return(&dynamodb.UpdateItemOutput{}, nil)

table.WriteItems([]string{"Acme Corp", "Globex Corporation"})

mockSvc.AssertExpectations(t)
}
4 changes: 3 additions & 1 deletion scraper/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ go 1.20
require (
github.com/PuerkitoBio/goquery v1.8.1
github.com/aws/aws-lambda-go v1.45.0
github.com/aws/aws-sdk-go v1.50.29
github.com/stretchr/testify v1.7.2
)

require (
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/objx v0.1.0 // indirect
golang.org/x/net v0.7.0 // indirect
golang.org/x/net v0.17.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
11 changes: 10 additions & 1 deletion scraper/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@ github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x0
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/aws/aws-lambda-go v1.45.0 h1:3xS35Dlc8ffmcwfcKTyqJGiMuL0UDvkQaVUrI5yHycI=
github.com/aws/aws-lambda-go v1.45.0/go.mod h1:dpMpZgvWx5vuQJfBt0zqBha60q7Dd7RfgJv23DymV8A=
github.com/aws/aws-sdk-go v1.50.29 h1:Ol2FYzesF2tsQrgVSnDWRFI60+FsSqKKdt7MLlZKubc=
github.com/aws/aws-sdk-go v1.50.29/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
Expand All @@ -21,8 +27,9 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
Expand All @@ -46,5 +53,7 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Loading

0 comments on commit 1c7cae0

Please sign in to comment.