Fix race between adding job and registering

Gearman plugin had a race between adding jobs to the functionList and
registering jobs. When registering jobs the functionMap is cleared, when
adding a job the plugin checks if the job is in the function Map before
running it. If we happen to trigger registration of jobs when we get a
response from gearman with a job assignment then the functionMap can be
empty making us send a work fail instead of running the job.

To make things worse this jenkins worker would not send a subsequent
GET JOB and would live lock never doing any useful work.

Correct this by making the processing for gearman events synchronous in
the work loop. This ensures that we never try to clear the function map
and check against it at the same time via different threads. To make
this happen the handleSessionEvent() method puts all events on a thread
safe queue for synchronous processing. This has allowed us to simplify
the work() loop and basically do the following:

  while running:
    init()
    register()
    process one event
    run function if processed
    drive IO

This is much easier to reason about as we essentially only have
bookkeeping and the code for one thing at a time.

Change-Id: Id537710f6c8276a528ad78afd72c5a7c8e8a16ac
This commit is contained in:
Clark Boylan 2015-05-04 18:09:31 -07:00
parent 6de3cdd29b
commit 4f2f53f38a

View File

@ -36,8 +36,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Queue; import java.util.Queue;
import java.util.Set; import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.ConcurrentLinkedQueue;
import org.gearman.common.Constants; import org.gearman.common.Constants;
import org.gearman.common.GearmanException; import org.gearman.common.GearmanException;
@ -70,7 +70,7 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
IDLE, RUNNING, SHUTTINGDOWN IDLE, RUNNING, SHUTTINGDOWN
} }
private static final String DESCRIPION_PREFIX = "GearmanWorker"; private static final String DESCRIPION_PREFIX = "GearmanWorker";
private Queue<GearmanFunction> functionList = null; private ConcurrentLinkedQueue<GearmanSessionEvent> eventList = null;
private Selector ioAvailable = null; private Selector ioAvailable = null;
private static final org.slf4j.Logger LOG = LoggerFactory.getLogger( private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(
Constants.GEARMAN_WORKER_LOGGER_NAME); Constants.GEARMAN_WORKER_LOGGER_NAME);
@ -78,7 +78,6 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
private Map<String, FunctionDefinition> functionMap; private Map<String, FunctionDefinition> functionMap;
private State state; private State state;
private ExecutorService executorService; private ExecutorService executorService;
private Map<GearmanJobServerSession, GearmanTask> taskMap = null;
private GearmanJobServerSession session = null; private GearmanJobServerSession session = null;
private final GearmanJobServerIpConnectionFactory connFactory = new GearmanNIOJobServerConnectionFactory(); private final GearmanJobServerIpConnectionFactory connFactory = new GearmanNIOJobServerConnectionFactory();
private volatile boolean jobUniqueIdRequired = false; private volatile boolean jobUniqueIdRequired = false;
@ -183,12 +182,11 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
public MyGearmanWorkerImpl(ExecutorService executorService, public MyGearmanWorkerImpl(ExecutorService executorService,
AvailabilityMonitor availability) { AvailabilityMonitor availability) {
this.availability = availability; this.availability = availability;
functionList = new LinkedList<GearmanFunction>(); eventList = new ConcurrentLinkedQueue<GearmanSessionEvent>();
id = DESCRIPION_PREFIX + ":" + Thread.currentThread().getId(); id = DESCRIPION_PREFIX + ":" + Thread.currentThread().getId();
functionMap = new HashMap<String, FunctionDefinition>(); functionMap = new HashMap<String, FunctionDefinition>();
state = State.IDLE; state = State.IDLE;
this.executorService = executorService; this.executorService = executorService;
taskMap = new HashMap<GearmanJobServerSession, GearmanTask>();
functionRegistry = new FunctionRegistry(); functionRegistry = new FunctionRegistry();
try { try {
@ -251,18 +249,23 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
LOG.debug("---- Worker " + this + " registered function " + LOG.debug("---- Worker " + this + " registered function " +
factory.getFunctionName()); factory.getFunctionName());
} }
enqueueNoopEvent();
}
public void enqueueNoopEvent() {
// Simulate a NOOP packet which will kick off a GRAB_JOB cycle // Simulate a NOOP packet which will kick off a GRAB_JOB cycle
// if we're sleeping. If we get a real NOOP in the mean time, // If we get a real NOOP in the mean time, it should be fine
// it should be fine because GearmanJobServerSession ignores a // because GearmanJobServerSession ignores a NOOP if PRE_SLEEP
// NOOP if PRE_SLEEP is not on the stack. // is not on the stack.
GearmanPacket p = new GearmanPacketImpl(GearmanPacketMagic.RES, GearmanPacket p = new GearmanPacketImpl(GearmanPacketMagic.RES,
GearmanPacketType.NOOP, new byte[0]); GearmanPacketType.NOOP, new byte[0]);
GearmanSessionEvent event = new GearmanSessionEvent(p, session); GearmanSessionEvent event = new GearmanSessionEvent(p, session);
session.handleSessionEvent(event); enqueueEvent(event);
} }
public void work() { public void work() {
GearmanSessionEvent event = null;
GearmanFunction function = null;
LOG.info("---- Worker " + this + " starting work"); LOG.info("---- Worker " + this + " starting work");
if (!state.equals(State.IDLE)) { if (!state.equals(State.IDLE)) {
@ -271,7 +274,6 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
} }
state = State.RUNNING; state = State.RUNNING;
boolean grabJobSent = false;
while (isRunning()) { while (isRunning()) {
LOG.debug("---- Worker " + this + " top of run loop"); LOG.debug("---- Worker " + this + " top of run loop");
@ -279,12 +281,8 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
if (!session.isInitialized()) { if (!session.isInitialized()) {
LOG.debug("---- Worker " + this + " run loop reconnect"); LOG.debug("---- Worker " + this + " run loop reconnect");
reconnect(); reconnect();
grabJobSent = false; enqueueNoopEvent();
} // Restart loop to check we connected.
// if still disconnected, skip
if (!session.isInitialized()) {
LOG.debug("---- Worker " + this + " run loop not initialized");
continue; continue;
} }
@ -298,55 +296,41 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
continue; continue;
} }
if (!isRunning()) continue; if (!isRunning() || !session.isInitialized()) continue;
if (functionList.isEmpty()) { event = eventList.poll();
LOG.debug("---- Worker " + this + " run loop function list is empty while" + function = processSessionEvent(event);
" checking for initial grab job");
if (!grabJobSent) {
// send the initial GRAB_JOB on reconnection.
LOG.info("---- Worker " + this + " sending initial grab job");
try {
sendGrabJob(session);
} catch (InterruptedException e) {
LOG.warn("---- Worker " + this +
" interrupted while waiting for okay to send grab job", e);
continue;
}
grabJobSent = true;
try {
session.driveSessionIO();
} catch (IOException io) {
LOG.warn("---- Worker " + this + " receieved IOException while" +
" sending initial grab job", io);
session.closeSession();
continue;
}
}
}
LOG.debug("---- Worker " + this + " run loop finished initial grab job");
if (!isRunning()) continue; if (!isRunning() || !session.isInitialized()) continue;
if (functionList.isEmpty()) { //For the time being we will execute the jobs synchronously
LOG.debug("---- Worker " + this + " function list empty; selecting"); //in the future, I expect to change this.
int interestOps = SelectionKey.OP_READ; if (function != null) {
if (session.sessionHasDataToWrite()) { LOG.info("---- Worker " + this + " executing function");
interestOps |= SelectionKey.OP_WRITE; submitFunction(function);
} // Send another grab_job on the next loop
session.getSelectionKey().interestOps(interestOps); enqueueNoopEvent();
}
try {
ioAvailable.select(); if (!isRunning() || !session.isInitialized()) continue;
} catch (IOException io) {
LOG.warn("---- Worker " + this + " receieved IOException while" + // Run IO, select waiting for ability to read and/or write
" selecting for IO", io); // then read and/or write.
session.closeSession(); int interestOps = SelectionKey.OP_READ;
continue; if (session.sessionHasDataToWrite()) {
} interestOps |= SelectionKey.OP_WRITE;
}
session.getSelectionKey().interestOps(interestOps);
try {
ioAvailable.select();
} catch (IOException io) {
LOG.warn("---- Worker " + this + " receieved IOException while" +
" selecting for IO", io);
session.closeSession();
continue;
} }
LOG.debug("---- Worker " + this + " run loop finished selecting");
if (ioAvailable.selectedKeys().contains(session.getSelectionKey())) { if (ioAvailable.selectedKeys().contains(session.getSelectionKey())) {
LOG.debug("---- Worker " + this + " received input in run loop"); LOG.debug("---- Worker " + this + " received input in run loop");
if (!session.isInitialized()) { if (!session.isInitialized()) {
@ -363,19 +347,6 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
} }
} }
LOG.debug("---- Worker " + this + " run loop finished driving session io"); LOG.debug("---- Worker " + this + " run loop finished driving session io");
if (!isRunning()) continue;
//For the time being we will execute the jobs synchronously
//in the future, I expect to change this.
if (!functionList.isEmpty()) {
LOG.info("---- Worker " + this + " executing function");
GearmanFunction fun = functionList.remove();
submitFunction(fun);
// Send another grab_job on the next loop
grabJobSent = false;
}
LOG.debug("---- Worker " + this + " bottom of run loop");
} }
shutDownWorker(true); shutDownWorker(true);
@ -391,63 +362,71 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
new GrabJobEventHandler(s), new GrabJobEventHandler(s),
new GearmanPacketImpl(GearmanPacketMagic.REQ, new GearmanPacketImpl(GearmanPacketMagic.REQ,
getGrabJobPacketType(), new byte[0])); getGrabJobPacketType(), new byte[0]));
taskMap.put(s, grabJobTask);
s.submitTask(grabJobTask); s.submitTask(grabJobTask);
} }
public void handleSessionEvent(GearmanSessionEvent event) public void handleSessionEvent(GearmanSessionEvent event)
throws IllegalArgumentException, IllegalStateException { throws IllegalArgumentException, IllegalStateException {
GearmanPacket p = event.getPacket(); enqueueEvent(event);
GearmanJobServerSession s = event.getSession(); }
GearmanPacketType t = p.getPacketType();
LOG.debug("---- Worker " + this + " handling session event" + public void enqueueEvent(GearmanSessionEvent event) {
" ( Session = " + s + " Event = " + t + " )"); // Enqueue in a thread safe manner. Events will
switch (t) { // be pulled off and processed serially in this workers
case JOB_ASSIGN: // main thread.
//TODO Figure out what the right behavior is if JobUUIDRequired was false when we submitted but is now true eventList.add(event);
LOG.info("---- Worker " + this + " received job assignment"); }
taskMap.remove(s);
addNewJob(event); private GearmanFunction processSessionEvent(GearmanSessionEvent event)
break; throws IllegalArgumentException, IllegalStateException {
case JOB_ASSIGN_UNIQ: if (event != null) {
//TODO Figure out what the right behavior is if JobUUIDRequired was true when we submitted but is now false GearmanPacket p = event.getPacket();
LOG.info("---- Worker " + this + " received unique job assignment"); GearmanJobServerSession s = event.getSession();
taskMap.remove(s); GearmanPacketType t = p.getPacketType();
addNewJob(event); LOG.debug("---- Worker " + this + " handling session event" +
break; " ( Session = " + s + " Event = " + t + " )");
case NOOP: switch (t) {
taskMap.remove(s); case JOB_ASSIGN:
LOG.debug("---- Worker " + this + " sending grab job after wakeup"); //TODO Figure out what the right behavior is if JobUUIDRequired was false when we submitted but is now true
try { LOG.info("---- Worker " + this + " received job assignment");
sendGrabJob(s); return addNewJob(event);
} catch (InterruptedException e) { case JOB_ASSIGN_UNIQ:
LOG.warn("---- Worker " + this + " interrupted while waiting for okay to send " + //TODO Figure out what the right behavior is if JobUUIDRequired was true when we submitted but is now false
"grab job", e); LOG.info("---- Worker " + this + " received unique job assignment");
} return addNewJob(event);
break; case NOOP:
case NO_JOB: LOG.debug("---- Worker " + this + " sending grab job after wakeup");
// We didn't get a job, so allow other workers or try {
// Jenkins to schedule on this node. sendGrabJob(s);
availability.unlock(this); } catch (InterruptedException e) {
LOG.debug("---- Worker " + this + " sending pre sleep after no_job"); LOG.warn("---- Worker " + this + " interrupted while waiting for okay to send " +
GearmanTask preSleepTask = new GearmanTask(new GrabJobEventHandler(s), "grab job", e);
new GearmanPacketImpl(GearmanPacketMagic.REQ, }
GearmanPacketType.PRE_SLEEP, new byte[0])); break;
taskMap.put(s, preSleepTask); case NO_JOB:
s.submitTask(preSleepTask); // We didn't get a job, so allow other workers or
break; // Jenkins to schedule on this node.
case ECHO_RES: availability.unlock(this);
break; LOG.debug("---- Worker " + this + " sending pre sleep after no_job");
case OPTION_RES: GearmanTask preSleepTask = new GearmanTask(new GrabJobEventHandler(s),
break; new GearmanPacketImpl(GearmanPacketMagic.REQ,
case ERROR: GearmanPacketType.PRE_SLEEP, new byte[0]));
s.closeSession(); s.submitTask(preSleepTask);
break; break;
default: case ECHO_RES:
LOG.warn("---- Worker " + this + " received unknown packet type " + t + break;
" from session " + s + "; closing connection"); case OPTION_RES:
s.closeSession(); break;
case ERROR:
s.closeSession();
break;
default:
LOG.warn("---- Worker " + this + " received unknown packet type " + t +
" from session " + s + "; closing connection");
s.closeSession();
}
} }
return null;
} }
public boolean addServer(String host, int port) { public boolean addServer(String host, int port) {
@ -554,7 +533,7 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
return exceptions; return exceptions;
} }
private void addNewJob(GearmanSessionEvent event) { private GearmanFunction addNewJob(GearmanSessionEvent event) {
byte[] handle, data, functionNameBytes, unique; byte[] handle, data, functionNameBytes, unique;
GearmanPacket p = event.getPacket(); GearmanPacket p = event.getPacket();
String functionName; String functionName;
@ -572,6 +551,7 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
new GearmanPacketImpl(GearmanPacketMagic.REQ, new GearmanPacketImpl(GearmanPacketMagic.REQ,
GearmanPacketType.WORK_FAIL, handle)); GearmanPacketType.WORK_FAIL, handle));
session.submitTask(gsr); session.submitTask(gsr);
enqueueNoopEvent();
} else { } else {
GearmanFunction function = def.getFactory().getFunction(); GearmanFunction function = def.getFactory().getFunction();
function.setData(data); function.setData(data);
@ -580,8 +560,9 @@ public class MyGearmanWorkerImpl implements GearmanSessionEventHandler {
if (unique != null && unique.length > 0) { if (unique != null && unique.length > 0) {
function.setUniqueId(unique); function.setUniqueId(unique);
} }
functionList.add(function); return function;
} }
return null;
} }
private void submitFunction(GearmanFunction fun) { private void submitFunction(GearmanFunction fun) {