Skip to content

Commit

Permalink
pg: avoid hang on orphaned transcation panic
Browse files Browse the repository at this point in the history
When a test or an application panics due to an orphaned transaction
when trying to close the DB.  This change avoids the hang by
automatically rolling back the orphaned transaction and closing the DB
before the explicit panic.

For example:

2024-12-05 16:07:23.377 [INF] NODE1: Consensus message received {type=block_proposal sender=0281c655d760af1b41e47fa5614679d771eee25a6427ce797218517e210f475bab}
2024-12-05 16:07:23.377 [INF] NODE1: Already processing the block proposal {height=1}
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:573: expected block hash 73f4a3e53c41fa8a06f05fafb0401228b991a5600f1a043ab9ee7c3c44ff409c, got 8d301bb8b1ce1e7d208ea06dd783ef8f5e6d314c4979b3d93ec5f55807a4714b
    engine_test.go:570:
        	Error Trace:	/home/jon/kwil/git/kwil-db/node/consensus/engine_test.go:570
        	Error:      	Condition never satisfied
        	Test:       	TestValidatorStateMachine/MultipleBlockProposals
2024-12-05 16:07:24.878 [INF] NODE1: Shutting down the consensus engine
--- FAIL: TestValidatorStateMachine (2.98s)
    --- PASS: TestValidatorStateMachine/BlkPropAndCommit (0.54s)
    --- PASS: TestValidatorStateMachine/InvalidAppHash (0.51s)
    --- FAIL: TestValidatorStateMachine/MultipleBlockProposals (1.93s)
panic: already in a tx
	panic: Closed the DB with an active transaction! [recovered]
	panic: Closed the DB with an active transaction!

goroutine 356 [running]:
testing.tRunner.func1.2({0xccb200, 0x10295e0})
	/home/jon/go123/src/testing/testing.go:1632 +0x230
testing.tRunner.func1()
	/home/jon/go123/src/testing/testing.go:1635 +0x35e
panic({0xccb200?, 0x10295e0?})
	/home/jon/go123/src/runtime/panic.go:785 +0x132
github.com/kwilteam/kwil-db/node/pg.(*DB).Close(0xc0000bb980)
	/home/jon/kwil/git/kwil-db/node/pg/db.go:277 +0xc5
panic({0xccb200?, 0x10295f0?})
	/home/jon/go123/src/runtime/panic.go:785 +0x132
github.com/kwilteam/kwil-db/node/pg.(*DB).AutoCommit(0x1f?, 0xac?)
	/home/jon/kwil/git/kwil-db/node/pg/db.go:304 +0xb1
github.com/kwilteam/kwil-db/node/consensus.generateTestCEConfig.func1(0xc0000bb980)
	/home/jon/kwil/git/kwil-db/node/consensus/engine_test.go:83 +0x1d
github.com/kwilteam/kwil-db/node/pg/test.NewTestDBNamed.func2()
	/home/jon/kwil/git/kwil-db/node/pg/test/testdb.go:116 +0x47
testing.(*common).Cleanup.func1()
	/home/jon/go123/src/testing/testing.go:1176 +0x109
testing.(*common).runCleanup(0xc00045c000, 0xc0001df340?)
	/home/jon/go123/src/testing/testing.go:1354 +0xd5
testing.tRunner.func2()
	/home/jon/go123/src/testing/testing.go:1684 +0x25
runtime.Goexit()
	/home/jon/go123/src/runtime/panic.go:629 +0x5e
testing.(*common).FailNow(0xc00045c000)
	/home/jon/go123/src/testing/testing.go:1006 +0x4a
github.com/stretchr/testify/require.Eventually({0x102ffb8, 0xc00045c000}, 0xc000200700, 0x59682f00, 0x5f5e100, {0x0, 0x0, 0x0})
	/home/jon/go/pkg/mod/github.com/stretchr/testify@v1.10.0/require/require.go:401 +0xe4
github.com/kwilteam/kwil-db/node/consensus.TestValidatorStateMachine.func66(0xc00045c000)
	/home/jon/kwil/git/kwil-db/node/consensus/engine_test.go:570 +0x315
testing.tRunner(0xc00045c000, 0xc0003e4410)
	/home/jon/go123/src/testing/testing.go:1690 +0xf4
created by testing.(*T).Run in goroutine 7
	/home/jon/go123/src/testing/testing.go:1743 +0x390
exit status 2
FAIL	github.com/kwilteam/kwil-db/node/consensus	3.008s
  • Loading branch information
jchappelow committed Dec 5, 2024
1 parent 2d7128d commit ba125c4
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions node/pg/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,16 @@ func (db *DB) EnsureFullReplicaIdentityDatasets(ctx context.Context) error {
func (db *DB) Close() error {
db.cancel(nil)
db.repl.stop()
if db.tx != nil {
// This is a bug, so we are not going to panic so the issue is not
// easily ignored, but we will rollback the tx so we don't hang or leak
// postgresql resources.
db.tx.Rollback(context.Background())
db.tx = nil
db.txid = ""
db.pool.Close()
panic("Closed the DB with an active transaction!")
}
return db.pool.Close()
}

Expand Down

0 comments on commit ba125c4

Please sign in to comment.