From ef3c1ab4d53a59b7aa47186d184fe950ded8ad91 Mon Sep 17 00:00:00 2001 From: Ray Zhang Date: Mon, 3 Oct 2016 10:39:03 -0700 Subject: [PATCH] Adds the lazy seek Can make the query couple of 10 times faster. In order to debug a Presto query performance issue, I observed the seeking in Sahara-extra is expensive and sometimes even unnecessary. The best way to avoid the overhead and unnecessary calls of seeking is to do it only when the client really needs the data. After this changes, the same query in Presto able to run 30 times faster. Both Presto and S3 clients have added the similar changes too. Change-Id: I8586af0d481fd08d48620e699467280f7b93150a --- .../swift/snative/SwiftNativeInputStream.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/hadoop-swiftfs/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeInputStream.java b/hadoop-swiftfs/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeInputStream.java index 691ecd0..04fbfc9 100644 --- a/hadoop-swiftfs/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeInputStream.java +++ b/hadoop-swiftfs/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeInputStream.java @@ -89,6 +89,8 @@ class SwiftNativeInputStream extends FSInputStream { */ private long rangeOffset = 0; + private long nextReadPosition = 0; + public SwiftNativeInputStream(SwiftNativeFileSystemStore storeNative, FileSystem.Statistics statistics, Path path, long bufferSize) throws IOException { @@ -138,6 +140,7 @@ class SwiftNativeInputStream extends FSInputStream { verifyOpen(); int result = -1; try { + seekStream(); result = httpStream.read(); } catch (IOException e) { String msg = "IOException while reading " + path @@ -297,6 +300,13 @@ class SwiftNativeInputStream extends FSInputStream { */ @Override public synchronized void seek(long targetPos) throws IOException { + if (targetPos < 0) { + throw new IOException("Negative Seek offset not supported"); + } + nextReadPosition = targetPos; + } + + public synchronized void realSeek(long targetPos) throws IOException { if (targetPos < 0) { throw new IOException("Negative Seek offset not supported"); } @@ -344,6 +354,18 @@ class SwiftNativeInputStream extends FSInputStream { fillBuffer(targetPos); } + /** + * Lazy seek. + * @throws IOException + */ + private void seekStream() throws IOException { + if (httpStream != null && nextReadPosition == pos) { + // already at specified position + return; + } + realSeek(nextReadPosition); + } + /** * Fill the buffer from the target position * If the target position == current position, the