go: GREP middleware

This middleware adds support for grepping objects in-place using the GREP
method and the "e" query parameter.  "e" was chosen go match grep(1).  Other
grep parameters may be implemented later if needed.
It detects gzip and bzip2 files and decompresses them inline.
This is intended to allow backend tools to search log files.

Example:

    GREP /sda/123/account/container/object?e=SEARCH HTTP/1.1

Change-Id: I569a72f4fc0f8ab85b90e8677f6daff0e28e8a79
This commit is contained in:
Michael Barton
2015-08-26 19:42:34 +00:00
parent 4d40f493dd
commit 73558d1412
3 changed files with 294 additions and 1 deletions

101
go/middleware/grep.go Normal file
View File

@@ -0,0 +1,101 @@
// Copyright (c) 2015 Rackspace
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package middleware
import (
"bufio"
"compress/bzip2"
"compress/gzip"
"io"
"net/http"
"regexp"
)
type grepWriter struct {
w io.Writer
h http.Header
status int
}
func (g *grepWriter) Header() http.Header {
return g.h
}
func (g *grepWriter) Write(buf []byte) (int, error) {
return g.w.Write(buf)
}
func (g *grepWriter) WriteHeader(status int) {
g.status = status
}
// GrepObject is an http middleware that searches objects line-by-line on the object server, similar to grep(1).
func GrepObject(next http.Handler) http.Handler {
return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
if request.Method != "GREP" {
next.ServeHTTP(writer, request)
return
}
pr, pw := io.Pipe()
defer pr.Close()
defer pw.Close()
newWriter := &grepWriter{w: pw, h: make(http.Header), status: 200}
newRequest, _ := http.NewRequest("GET", request.URL.String(), nil)
newRequest.Header = request.Header
go func() {
defer pw.Close()
next.ServeHTTP(newWriter, newRequest)
}()
q := request.URL.Query().Get("e")
if q == "" {
writer.WriteHeader(400)
return
}
re, err := regexp.Compile(q)
if err != nil {
writer.WriteHeader(400)
return
}
// peek at response data first to make sure the downstream handler has set a status code
br := bufio.NewReader(pr)
magic, err := br.Peek(4)
if newWriter.status == 200 {
var scanner *bufio.Scanner
if err == nil && magic[0] == 0x1f && magic[1] == 0x8b {
if gzr, err := gzip.NewReader(br); err != nil {
writer.WriteHeader(500)
return
} else {
scanner = bufio.NewScanner(gzr)
}
} else if err == nil && magic[0] == 'B' && magic[1] == 'Z' && magic[2] == 'h' && magic[3] >= '1' && magic[3] <= '9' {
scanner = bufio.NewScanner(bzip2.NewReader(br))
} else {
scanner = bufio.NewScanner(br)
}
writer.WriteHeader(200)
for scanner.Scan() {
if line := scanner.Bytes(); re.Match(line) {
writer.Write(line)
writer.Write([]byte{'\n'})
}
}
} else {
writer.WriteHeader(newWriter.status)
io.Copy(writer, br)
}
})
}

192
go/middleware/grep_test.go Normal file
View File

@@ -0,0 +1,192 @@
package middleware
import (
"io/ioutil"
"log"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/require"
)
func TestGrepObject(t *testing.T) {
data := []byte("THIS IS LINE 1\nTHIS IS LINE 2\nTHIS IS LINE 3\nTHIS IS LINE 20\n")
ts := httptest.NewServer(GrepObject(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Hi", "There")
w.WriteHeader(200)
w.Write(data)
})))
defer ts.Close()
req, _ := http.NewRequest("GREP", ts.URL+"?e=THIS", nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
response, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
}
require.Equal(t, 200, res.StatusCode)
require.Equal(t, string(data), string(response))
req, _ = http.NewRequest("GREP", ts.URL+"?e=2", nil)
res, err = http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
response, err = ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
}
require.Equal(t, "", res.Header.Get("X-Hi"))
require.Equal(t, 200, res.StatusCode)
require.Equal(t, "THIS IS LINE 2\nTHIS IS LINE 20\n", string(response))
}
func TestGrepPassNonGrep(t *testing.T) {
data := []byte("THIS IS LINE 1\nTHIS IS LINE 2\nTHIS IS LINE 3\nTHIS IS LINE 20\n")
ts := httptest.NewServer(GrepObject(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Hi", "There")
w.WriteHeader(200)
w.Write(data)
})))
defer ts.Close()
req, _ := http.NewRequest("GREP", ts.URL+"?e=THIS", nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
response, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
}
require.Equal(t, 200, res.StatusCode)
require.Equal(t, string(data), string(response))
req, _ = http.NewRequest("GET", ts.URL, nil)
res, err = http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
response, err = ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
}
require.Equal(t, "There", res.Header.Get("X-Hi"))
require.Equal(t, 200, res.StatusCode)
require.Equal(t, "THIS IS LINE 1\nTHIS IS LINE 2\nTHIS IS LINE 3\nTHIS IS LINE 20\n", string(response))
}
func TestGrepObject404(t *testing.T) {
ts := httptest.NewServer(GrepObject(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
})))
defer ts.Close()
req, _ := http.NewRequest("GREP", ts.URL+"?e=4", nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
res.Body.Close()
require.Equal(t, 404, res.StatusCode)
}
func TestGrepObjectGZ(t *testing.T) {
data := []byte{'\x1f', '\x8b', '\x08', '\x08', '\x1f', '\x14', '\xde', 'U', '\x02', '\xff',
'd', 'a', 't', 'a', '\x00', '\x0b', '\xf1', '\xf0', '\x0c', 'V', '\x00', '"', '\x1f',
'O', '?', 'W', '\x05', 'C', '\xae', '\x10', 'd', '\xae', '\x11', '*', '\xd7', '\x98',
'\x0b', '\x00', '\x97', 'V', '\x04', '\xc7', '-', '\x00', '\x00', '\x00'}
ts := httptest.NewServer(GrepObject(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
w.Write(data)
})))
defer ts.Close()
req, _ := http.NewRequest("GREP", ts.URL+"?e=2", nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
response, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
}
require.Equal(t, 200, res.StatusCode)
require.Equal(t, "THIS IS LINE 2\n", string(response))
}
func TestGrepObjectBadGZ(t *testing.T) {
data := []byte{'\x1f', '\x8b', 'X', 'X', 'X'}
ts := httptest.NewServer(GrepObject(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
w.Write(data)
})))
defer ts.Close()
req, _ := http.NewRequest("GREP", ts.URL+"?e=2", nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
res.Body.Close()
require.Equal(t, 500, res.StatusCode)
}
func TestGrepObjectBZ2(t *testing.T) {
data := []byte{'B', 'Z', 'h', '9', '1', 'A', 'Y', '&', 'S', 'Y', '\x0c', '\x0e', '\x0b', '\x95',
'\x00', '\x00', '\x15', '^', '\x00', '\x00', '\x10', '@', '\x00', '8', '\x00', '\x02', 'e',
'\x0c', '\x00', ' ', '\x00', '!', '\xb5', 'C', '@', '\xf2', '\x10', '4', '\r', '\n', '\xd5',
'l', 'h', '\xe0', '\xc8', 'A', '\x94', '\x88', '%', '\x0b', '\x17', 'r', 'E', '8', 'P', '\x90',
'\x0c', '\x0e', '\x0b', '\x95'}
ts := httptest.NewServer(GrepObject(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
w.Write(data)
})))
defer ts.Close()
req, _ := http.NewRequest("GREP", ts.URL+"?e=2", nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
response, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
}
require.Equal(t, 200, res.StatusCode)
require.Equal(t, "THIS IS LINE 2\n", string(response))
}
func TestGrepObjectInvalidRegex(t *testing.T) {
ts := httptest.NewServer(GrepObject(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
})))
defer ts.Close()
req, _ := http.NewRequest("GREP", ts.URL+"?e=([|", nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
res.Body.Close()
require.Equal(t, 400, res.StatusCode)
}
func TestGrepObjectNoQuery(t *testing.T) {
ts := httptest.NewServer(GrepObject(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
})))
defer ts.Close()
req, _ := http.NewRequest("GREP", ts.URL, nil)
res, err := http.DefaultClient.Do(req)
if err != nil {
log.Fatal(err)
}
res.Body.Close()
require.Equal(t, 400, res.StatusCode)
}

View File

@@ -702,7 +702,7 @@ func (server *ObjectServer) GetHandler() http.Handler {
router.NotFoundHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, fmt.Sprintf("Invalid path: %s", r.URL.Path), http.StatusBadRequest)
})
return router
return alice.New(middleware.GrepObject).Then(router)
}
func GetServer(conf string, flags *flag.FlagSet) (bindIP string, bindPort int, serv hummingbird.Server, logger hummingbird.SysLogLike, err error) {