Skip to content

Commit

Permalink
Simplify usage (#54)
Browse files Browse the repository at this point in the history
* Simplify usage
* Add root foreach callback.
* Simplify path specification for FindElement
* Add buger/jsonparser benchmark. Crazy fast.
  • Loading branch information
klauspost authored Dec 2, 2021
1 parent d95f712 commit 2455af9
Show file tree
Hide file tree
Showing 11 changed files with 321 additions and 158 deletions.
177 changes: 94 additions & 83 deletions README.md

Large diffs are not rendered by default.

49 changes: 42 additions & 7 deletions benchmarks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"encoding/json"
"testing"

"github.com/buger/jsonparser"
jsoniter "github.com/json-iterator/go"
)

Expand Down Expand Up @@ -166,31 +167,31 @@ func BenchmarkJsonParserLarge(b *testing.B) {
b.Fatal(err)
}
iter := pj.Iter()
elem, err = iter.FindElement("users", elem)
elem, err = iter.FindElement(elem, "users")
if checkErrs && err != nil {
b.Fatal(err)
}
ar, err = elem.Iter.Array(ar)
if checkErrs && err != nil {
b.Fatal(err)
}
ar.ForEach(func(t Type, i Iter) {
elem, err = i.FindElement("username", elem)
ar.ForEach(func(i Iter) {
elem, err = i.FindElement(elem, "username")
if checkErrs && err != nil {
b.Fatal(err)
}
_, _ = elem.Iter.StringBytes()
})

elem, err = iter.FindElement("topics/topics", elem)
elem, err = iter.FindElement(elem, "topics", "topics")
if checkErrs && err != nil {
b.Fatal(err)
}
ar, err = elem.Iter.Array(ar)
if checkErrs && err != nil {
b.Fatal(err)
}
ar.ForEach(func(t Type, i Iter) {
ar.ForEach(func(i Iter) {
if true {
// Use foreach...
obj, err = i.Object(obj)
Expand All @@ -213,13 +214,13 @@ func BenchmarkJsonParserLarge(b *testing.B) {

}, onlyKeys)
} else {
elem, err = i.FindElement("id", elem)
elem, err = i.FindElement(elem, "id")
if checkErrs && err != nil {
b.Fatal(err)
}
_, _ = elem.Iter.Int()
//b.Log(elem.Iter.Int())
elem, err = i.FindElement("slug", elem)
elem, err = i.FindElement(elem, "slug")
if checkErrs && err != nil {
b.Fatal(err)
}
Expand All @@ -230,3 +231,37 @@ func BenchmarkJsonParserLarge(b *testing.B) {
}
})
}

func BenchmarkBugerJsonParserLarge(b *testing.B) {
largeFixture := loadCompressed(b, "payload-large")
const logVals = false
b.SetBytes(int64(len(largeFixture)))
b.ReportAllocs()
b.ResetTimer()
var dump int
for i := 0; i < b.N; i++ {
jsonparser.ArrayEach(largeFixture, func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
sval, _, _, _ := jsonparser.Get(value, "username")
if logVals && i == 0 {
b.Log(string(sval))
}
dump += len(sval)
}, "users")

jsonparser.ArrayEach(largeFixture, func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
ival, _ := jsonparser.GetInt(value, "id")
if logVals && i == 0 {
b.Log(ival)
}
dump += int(ival)
sval, _, _, _ := jsonparser.Get(value, "slug")
if logVals && i == 0 {
b.Log(string(sval))
}
dump += len(sval)
}, "topics", "topics")
}
if dump == 0 {
b.Log("")
}
}
48 changes: 19 additions & 29 deletions examples/simdjson_example.go
Original file line number Diff line number Diff line change
@@ -1,42 +1,30 @@
package main

import (
"encoding/json"
"fmt"
"io/ioutil"
"log"

"github.com/minio/simdjson-go"
)

func printKey(iter simdjson.Iter, key string) (err error) {

obj, tmp, elem := &simdjson.Object{}, &simdjson.Iter{}, simdjson.Element{}

for {
typ := iter.Advance()

switch typ {
case simdjson.TypeRoot:
if typ, tmp, err = iter.Root(tmp); err != nil {
return
}

if typ == simdjson.TypeObject {
if obj, err = tmp.Object(obj); err != nil {
return
}

e := obj.FindKey(key, &elem)
if e != nil && elem.Type == simdjson.TypeString {
v, _ := elem.Iter.StringBytes()
fmt.Println(string(v))
}
}

default:
return
func printKeyHistogram(pj *simdjson.ParsedJson, key string) (err error) {
var elem *simdjson.Element
count := make(map[string]int)
err = pj.ForEach(func(i simdjson.Iter) error {
if elem, err = i.FindElement(elem, key); err != nil {
return nil
}
}
if elem.Type == simdjson.TypeString {
s, _ := elem.Iter.String()
count[s]++
}
return nil
})
res, _ := json.Marshal(count)
fmt.Println(key, ":", string(res)+"\n")
return err
}

func main() {
Expand All @@ -53,5 +41,7 @@ func main() {
log.Fatalf("Failed to parse JSON: %v", err)
}

printKey(parsed.Iter(), "Make")
printKeyHistogram(parsed, "Make")
printKeyHistogram(parsed, "MeterId")
printKeyHistogram(parsed, "ViolationCode")
}
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
module github.com/minio/simdjson-go

go 1.13
go 1.15

require (
github.com/buger/jsonparser v1.1.1
github.com/json-iterator/go v1.1.9
github.com/klauspost/compress v1.13.6
github.com/klauspost/cpuid/v2 v2.0.6
github.com/klauspost/cpuid/v2 v2.0.9
)
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand All @@ -6,8 +8,8 @@ github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGn
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
Expand Down
91 changes: 81 additions & 10 deletions ndjson_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,60 @@ func TestNdjsonCountWhere(t *testing.T) {
if err != nil {
t.Fatal(err)
}
ser := NewSerializer()
ser.CompressMode(CompressBest)
b := ser.Serialize(nil, *pj)
t.Log(len(b))

const want = 116
if result := countWhere("Make", "HOND", *pj); result != want {
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
}
t.Run("countWhere", func(t *testing.T) {
if result := countWhere("Make", "HOND", *pj); result != want {
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
}
})
t.Run("foreach", func(t *testing.T) {
var result int
var elem *Element
var obj *Object
err := pj.ForEach(func(i Iter) error {
var err error
obj, err = i.Object(obj)
if err == nil {
elem = obj.FindKey("Make", elem)
if elem != nil {
bts, _ := elem.Iter.StringBytes()
if string(bts) == "HOND" {
result++
}
}
}
return nil
})
if err != nil {
t.Fatal(err)
}
if result != want {
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
}
})
t.Run("foreach-findelement", func(t *testing.T) {
var result int
var elem *Element
err := pj.ForEach(func(i Iter) error {
var err error
elem, err = i.FindElement(elem, "Make")
if err != nil {
return nil
}
bts, _ := elem.Iter.StringBytes()
if string(bts) == "HOND" {
result++
}
return nil
})
if err != nil {
t.Fatal(err)
}
if result != want {
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
}
})
}

func TestNdjsonCountWhere2(t *testing.T) {
Expand All @@ -287,9 +333,34 @@ func TestNdjsonCountWhere2(t *testing.T) {
t.Fatal(err)
}
const want = 170315
if result := countWhere("subreddit", "reddit.com", *pj); result != want {
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
}
t.Run("countWhere", func(t *testing.T) {
if result := countWhere("subreddit", "reddit.com", *pj); result != want {
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
}

})
t.Run("foreach-findelement", func(t *testing.T) {
var result int
var elem *Element
err := pj.ForEach(func(i Iter) error {
var err error
elem, err = i.FindElement(elem, "subreddit")
if err != nil {
return nil
}
bts, _ := elem.Iter.StringBytes()
if string(bts) == "reddit.com" {
result++
}
return nil
})
if err != nil {
t.Fatal(err)
}
if result != want {
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
}
})
}

func loadFile(filename string) []byte {
Expand Down
4 changes: 2 additions & 2 deletions parsed_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ func (a *Array) Iter() Iter {
}

// ForEach calls the provided function for every element.
func (a *Array) ForEach(fn func(t Type, i Iter)) {
func (a *Array) ForEach(fn func(i Iter)) {
i := a.Iter()
for {
t := i.Advance()
if t == TypeNone {
break
}
fn(t, i)
fn(i)
}
return
}
Expand Down
28 changes: 24 additions & 4 deletions parsed_json.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,24 @@ func (pj *ParsedJson) stringByteAt(offset, length uint64) ([]byte, error) {
return pj.Strings.B[offset : offset+length], nil
}

// ForEach returns each line in NDJSON, or the top element in non-ndjson.
// This will usually be an object or an array.
// If the callback returns a non-nil error parsing stops and the errors is returned.
func (pj *ParsedJson) ForEach(fn func(i Iter) error) error {
i := Iter{tape: *pj}
var elem Iter
for {
t, err := i.AdvanceIter(&elem)
if err != nil || t != TypeRoot {
return err
}
elem.AdvanceInto()
if err = fn(elem); err != nil {
return err
}
}
}

// Clone returns a deep clone of the ParsedJson.
// If a nil destination is sent a new will be created.
func (pj *ParsedJson) Clone(dst *ParsedJson) *ParsedJson {
Expand Down Expand Up @@ -751,13 +769,15 @@ func (i *Iter) Root(dst *Iter) (Type, *Iter, error) {

// FindElement allows searching for fields and objects by path from the iter and forward,
// moving into root and objects, but not arrays.
// Separate each object name by /.
// For example `Image/Url` will search the current root/object for an "Image"
// For example "Image", "Url" will search the current root/object for an "Image"
// object and return the value of the "Url" element.
// ErrPathNotFound is returned if any part of the path cannot be found.
// If the tape contains an error it will be returned.
// The iter will *not* be advanced.
func (i *Iter) FindElement(path string, dst *Element) (*Element, error) {
func (i *Iter) FindElement(dst *Element, path ...string) (*Element, error) {
if len(path) == 0 {
return dst, ErrPathNotFound
}
// Local copy.
cp := *i
for {
Expand All @@ -768,7 +788,7 @@ func (i *Iter) FindElement(path string, dst *Element) (*Element, error) {
if err != nil {
return dst, err
}
return obj.FindPath(path, dst)
return obj.FindPath(dst, path...)
case TagRoot:
_, _, err := cp.Root(&cp)
if err != nil {
Expand Down
Loading

0 comments on commit 2455af9

Please sign in to comment.