Skip to content

Commit d3f2f59

Browse files
Christopher TateAndroid (Google) Code Review
authored andcommitted
Merge "Fix edge cases leading to backup hanging forever"
2 parents 03cbb97 + 6de74ff commit d3f2f59

File tree

2 files changed

+98
-5
lines changed

2 files changed

+98
-5
lines changed

core/java/com/android/internal/backup/BackupConstants.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@ public class BackupConstants {
2424
public static final int TRANSPORT_ERROR = 1;
2525
public static final int TRANSPORT_NOT_INITIALIZED = 2;
2626
public static final int AGENT_ERROR = 3;
27+
public static final int AGENT_UNKNOWN = 4;
2728
}

services/java/com/android/server/BackupManagerService.java

Lines changed: 97 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,10 @@ public String toString() {
235235
volatile long mLastBackupPass;
236236
volatile long mNextBackupPass;
237237

238+
// For debugging, we maintain a progress trace of operations during backup
239+
static final boolean DEBUG_BACKUP_TRACE = true;
240+
final List<String> mBackupTrace = new ArrayList<String>();
241+
238242
// A similar synchronization mechanism around clearing apps' data for restore
239243
final Object mClearDataLock = new Object();
240244
volatile boolean mClearingData;
@@ -652,6 +656,23 @@ public void handleMessage(Message msg) {
652656
}
653657
}
654658

659+
// ----- Debug-only backup operation trace -----
660+
void addBackupTrace(String s) {
661+
if (DEBUG_BACKUP_TRACE) {
662+
synchronized (mBackupTrace) {
663+
mBackupTrace.add(s);
664+
}
665+
}
666+
}
667+
668+
void clearBackupTrace() {
669+
if (DEBUG_BACKUP_TRACE) {
670+
synchronized (mBackupTrace) {
671+
mBackupTrace.clear();
672+
}
673+
}
674+
}
675+
655676
// ----- Main service implementation -----
656677

657678
public BackupManagerService(Context context) {
@@ -1612,6 +1633,7 @@ IBackupAgent bindToAgentSynchronous(ApplicationInfo app, int mode) {
16121633
mAgentConnectLock.wait(5000);
16131634
} catch (InterruptedException e) {
16141635
// just bail
1636+
if (DEBUG) Slog.w(TAG, "Interrupted: " + e);
16151637
return null;
16161638
}
16171639
}
@@ -1621,6 +1643,7 @@ IBackupAgent bindToAgentSynchronous(ApplicationInfo app, int mode) {
16211643
Slog.w(TAG, "Timeout waiting for agent " + app);
16221644
return null;
16231645
}
1646+
if (DEBUG) Slog.i(TAG, "got agent " + mConnectedAgent);
16241647
agent = mConnectedAgent;
16251648
}
16261649
} catch (RemoteException e) {
@@ -1814,6 +1837,8 @@ public PerformBackupTask(IBackupTransport transport, ArrayList<BackupRequest> qu
18141837

18151838
mCurrentState = BackupState.INITIAL;
18161839
mFinished = false;
1840+
1841+
addBackupTrace("STATE => INITIAL");
18171842
}
18181843

18191844
// Main entry point: perform one chunk of work, updating the state as appropriate
@@ -1842,11 +1867,25 @@ public void execute() {
18421867
// We're starting a backup pass. Initialize the transport and send
18431868
// the PM metadata blob if we haven't already.
18441869
void beginBackup() {
1870+
if (DEBUG_BACKUP_TRACE) {
1871+
clearBackupTrace();
1872+
StringBuilder b = new StringBuilder(256);
1873+
b.append("beginBackup: [");
1874+
for (BackupRequest req : mOriginalQueue) {
1875+
b.append(' ');
1876+
b.append(req.packageName);
1877+
}
1878+
b.append(" ]");
1879+
addBackupTrace(b.toString());
1880+
}
1881+
18451882
mStatus = BackupConstants.TRANSPORT_OK;
18461883

18471884
// Sanity check: if the queue is empty we have no work to do.
18481885
if (mOriginalQueue.isEmpty()) {
18491886
Slog.w(TAG, "Backup begun with an empty queue - nothing to do.");
1887+
addBackupTrace("queue empty at begin");
1888+
executeNextState(BackupState.FINAL);
18501889
return;
18511890
}
18521891

@@ -1859,13 +1898,17 @@ void beginBackup() {
18591898

18601899
File pmState = new File(mStateDir, PACKAGE_MANAGER_SENTINEL);
18611900
try {
1862-
EventLog.writeEvent(EventLogTags.BACKUP_START, mTransport.transportDirName());
1901+
final String transportName = mTransport.transportDirName();
1902+
EventLog.writeEvent(EventLogTags.BACKUP_START, transportName);
18631903

18641904
// If we haven't stored package manager metadata yet, we must init the transport.
18651905
if (mStatus == BackupConstants.TRANSPORT_OK && pmState.length() <= 0) {
18661906
Slog.i(TAG, "Initializing (wiping) backup state and transport storage");
1907+
addBackupTrace("initializing transport " + transportName);
18671908
resetBackupState(mStateDir); // Just to make sure.
18681909
mStatus = mTransport.initializeDevice();
1910+
1911+
addBackupTrace("transport.initializeDevice() == " + mStatus);
18691912
if (mStatus == BackupConstants.TRANSPORT_OK) {
18701913
EventLog.writeEvent(EventLogTags.BACKUP_INITIALIZE);
18711914
} else {
@@ -1884,6 +1927,7 @@ void beginBackup() {
18841927
mPackageManager, allAgentPackages());
18851928
mStatus = invokeAgentForBackup(PACKAGE_MANAGER_SENTINEL,
18861929
IBackupAgent.Stub.asInterface(pmAgent.onBind()), mTransport);
1930+
addBackupTrace("PMBA invoke: " + mStatus);
18871931
}
18881932

18891933
if (mStatus == BackupConstants.TRANSPORT_NOT_INITIALIZED) {
@@ -1894,11 +1938,13 @@ void beginBackup() {
18941938
}
18951939
} catch (Exception e) {
18961940
Slog.e(TAG, "Error in backup thread", e);
1941+
addBackupTrace("Exception in backup thread: " + e);
18971942
mStatus = BackupConstants.TRANSPORT_ERROR;
18981943
} finally {
18991944
// If we've succeeded so far, invokeAgentForBackup() will have run the PM
19001945
// metadata and its completion/timeout callback will continue the state
19011946
// machine chain. If it failed that won't happen; we handle that now.
1947+
addBackupTrace("exiting prelim: " + mStatus);
19021948
if (mStatus != BackupConstants.TRANSPORT_OK) {
19031949
// if things went wrong at this point, we need to
19041950
// restage everything and try again later.
@@ -1912,11 +1958,12 @@ void beginBackup() {
19121958
// if that was warranted. Now we process the single next thing in the queue.
19131959
void invokeNextAgent() {
19141960
mStatus = BackupConstants.TRANSPORT_OK;
1961+
addBackupTrace("invoke q=" + mQueue.size());
19151962

19161963
// Sanity check that we have work to do. If not, skip to the end where
19171964
// we reestablish the wakelock invariants etc.
19181965
if (mQueue.isEmpty()) {
1919-
Slog.e(TAG, "Running queue but it's empty!");
1966+
if (DEBUG) Slog.i(TAG, "queue now empty");
19201967
executeNextState(BackupState.FINAL);
19211968
return;
19221969
}
@@ -1926,6 +1973,7 @@ void invokeNextAgent() {
19261973
mQueue.remove(0);
19271974

19281975
Slog.d(TAG, "starting agent for backup of " + request);
1976+
addBackupTrace("launch agent for " + request.packageName);
19291977

19301978
// Verify that the requested app exists; it might be something that
19311979
// requested a backup but was then uninstalled. The request was
@@ -1941,6 +1989,7 @@ void invokeNextAgent() {
19411989
mWakelock.setWorkSource(new WorkSource(mCurrentPackage.applicationInfo.uid));
19421990
agent = bindToAgentSynchronous(mCurrentPackage.applicationInfo,
19431991
IApplicationThread.BACKUP_MODE_INCREMENTAL);
1992+
addBackupTrace("agent bound; a? = " + (agent != null));
19441993
if (agent != null) {
19451994
mStatus = invokeAgentForBackup(request.packageName, agent, mTransport);
19461995
// at this point we'll either get a completion callback from the
@@ -1954,14 +2003,17 @@ void invokeNextAgent() {
19542003
// Try for the next one.
19552004
Slog.d(TAG, "error in bind/backup", ex);
19562005
mStatus = BackupConstants.AGENT_ERROR;
2006+
addBackupTrace("agent SE");
19572007
}
19582008
} catch (NameNotFoundException e) {
19592009
Slog.d(TAG, "Package does not exist; skipping");
2010+
addBackupTrace("no such package");
2011+
mStatus = BackupConstants.AGENT_UNKNOWN;
19602012
} finally {
19612013
mWakelock.setWorkSource(null);
19622014

19632015
// If there was an agent error, no timeout/completion handling will occur.
1964-
// That means we need to deal with the next state ourselves.
2016+
// That means we need to direct to the next state ourselves.
19652017
if (mStatus != BackupConstants.TRANSPORT_OK) {
19662018
BackupState nextState = BackupState.RUNNING_QUEUE;
19672019

@@ -1973,18 +2025,26 @@ void invokeNextAgent() {
19732025
dataChangedImpl(request.packageName);
19742026
mStatus = BackupConstants.TRANSPORT_OK;
19752027
if (mQueue.isEmpty()) nextState = BackupState.FINAL;
1976-
} else if (mStatus != BackupConstants.TRANSPORT_OK) {
2028+
} else if (mStatus == BackupConstants.AGENT_UNKNOWN) {
2029+
// Failed lookup of the app, so we couldn't bring up an agent, but
2030+
// we're otherwise fine. Just drop it and go on to the next as usual.
2031+
mStatus = BackupConstants.TRANSPORT_OK;
2032+
} else {
19772033
// Transport-level failure means we reenqueue everything
19782034
revertAndEndBackup();
19792035
nextState = BackupState.FINAL;
19802036
}
19812037

19822038
executeNextState(nextState);
2039+
} else {
2040+
addBackupTrace("expecting completion/timeout callback");
19832041
}
19842042
}
19852043
}
19862044

19872045
void finalizeBackup() {
2046+
addBackupTrace("finishing");
2047+
19882048
// Either backup was successful, in which case we of course do not need
19892049
// this pass's journal any more; or it failed, in which case we just
19902050
// re-enqueued all of these packages in the current active journal.
@@ -1997,6 +2057,7 @@ void finalizeBackup() {
19972057
// done a backup, we can now record what the current backup dataset token
19982058
// is.
19992059
if ((mCurrentToken == 0) && (mStatus == BackupConstants.TRANSPORT_OK)) {
2060+
addBackupTrace("success; recording token");
20002061
try {
20012062
mCurrentToken = mTransport.getCurrentRestoreSet();
20022063
} catch (RemoteException e) {} // can't happen
@@ -2012,11 +2073,13 @@ void finalizeBackup() {
20122073
// Make sure we back up everything and perform the one-time init
20132074
clearMetadata();
20142075
if (DEBUG) Slog.d(TAG, "Server requires init; rerunning");
2076+
addBackupTrace("init required; rerunning");
20152077
backupNow();
20162078
}
20172079
}
20182080

20192081
// Only once we're entirely finished do we release the wakelock
2082+
clearBackupTrace();
20202083
Slog.i(TAG, "Backup pass finished.");
20212084
mWakelock.release();
20222085
}
@@ -2031,7 +2094,8 @@ void clearMetadata() {
20312094
// handler in case it doesn't get back to us.
20322095
int invokeAgentForBackup(String packageName, IBackupAgent agent,
20332096
IBackupTransport transport) {
2034-
if (DEBUG) Slog.d(TAG, "processOneBackup doBackup() on " + packageName);
2097+
if (DEBUG) Slog.d(TAG, "invokeAgentForBackup on " + packageName);
2098+
addBackupTrace("invoking " + packageName);
20352099

20362100
mSavedStateName = new File(mStateDir, packageName);
20372101
mBackupDataName = new File(mDataDir, packageName + ".data");
@@ -2071,10 +2135,13 @@ int invokeAgentForBackup(String packageName, IBackupAgent agent,
20712135
ParcelFileDescriptor.MODE_TRUNCATE);
20722136

20732137
// Initiate the target's backup pass
2138+
addBackupTrace("setting timeout");
20742139
prepareOperationTimeout(token, TIMEOUT_BACKUP_INTERVAL, this);
2140+
addBackupTrace("calling agent doBackup()");
20752141
agent.doBackup(mSavedState, mBackupData, mNewState, token, mBackupManagerBinder);
20762142
} catch (Exception e) {
20772143
Slog.e(TAG, "Error invoking for backup on " + packageName);
2144+
addBackupTrace("exception: " + e);
20782145
EventLog.writeEvent(EventLogTags.BACKUP_AGENT_FAILURE, packageName,
20792146
e.toString());
20802147
agentErrorCleanup();
@@ -2085,6 +2152,7 @@ int invokeAgentForBackup(String packageName, IBackupAgent agent,
20852152
// either be a callback from the agent, at which point we'll process its data
20862153
// for transport, or a timeout. Either way the next phase will happen in
20872154
// response to the TimeoutHandler interface callbacks.
2155+
addBackupTrace("invoke success");
20882156
return BackupConstants.TRANSPORT_OK;
20892157
}
20902158

@@ -2096,6 +2164,7 @@ public void operationComplete() {
20962164
+ mCurrentPackage.packageName);
20972165
mBackupHandler.removeMessages(MSG_TIMEOUT);
20982166
clearAgentState();
2167+
addBackupTrace("operation complete");
20992168

21002169
ParcelFileDescriptor backupData = null;
21012170
mStatus = BackupConstants.TRANSPORT_OK;
@@ -2105,6 +2174,7 @@ public void operationComplete() {
21052174
if (mStatus == BackupConstants.TRANSPORT_OK) {
21062175
backupData = ParcelFileDescriptor.open(mBackupDataName,
21072176
ParcelFileDescriptor.MODE_READ_ONLY);
2177+
addBackupTrace("sending data to transport");
21082178
mStatus = mTransport.performBackup(mCurrentPackage, backupData);
21092179
}
21102180

@@ -2113,11 +2183,15 @@ public void operationComplete() {
21132183
// hold off on finishBackup() until the end, which implies holding off on
21142184
// renaming *all* the output state files (see below) until that happens.
21152185

2186+
addBackupTrace("data delivered: " + mStatus);
21162187
if (mStatus == BackupConstants.TRANSPORT_OK) {
2188+
addBackupTrace("finishing op on transport");
21172189
mStatus = mTransport.finishBackup();
2190+
addBackupTrace("finished: " + mStatus);
21182191
}
21192192
} else {
21202193
if (DEBUG) Slog.i(TAG, "no backup data written; not calling transport");
2194+
addBackupTrace("no data to send");
21212195
}
21222196

21232197
// After successful transport, delete the now-stale data
@@ -2165,12 +2239,14 @@ public void handleTimeout() {
21652239
Slog.e(TAG, "Timeout backing up " + mCurrentPackage.packageName);
21662240
EventLog.writeEvent(EventLogTags.BACKUP_AGENT_FAILURE, mCurrentPackage.packageName,
21672241
"timeout");
2242+
addBackupTrace("timeout of " + mCurrentPackage.packageName);
21682243
agentErrorCleanup();
21692244
dataChangedImpl(mCurrentPackage.packageName);
21702245
}
21712246

21722247
void revertAndEndBackup() {
21732248
if (MORE_DEBUG) Slog.i(TAG, "Reverting backup queue - restaging everything");
2249+
addBackupTrace("transport error; reverting");
21742250
for (BackupRequest request : mOriginalQueue) {
21752251
dataChangedImpl(request.packageName);
21762252
}
@@ -2199,13 +2275,15 @@ void clearAgentState() {
21992275

22002276
// If this was a pseudopackage there's no associated Activity Manager state
22012277
if (mCurrentPackage.applicationInfo != null) {
2278+
addBackupTrace("unbinding " + mCurrentPackage.packageName);
22022279
try { // unbind even on timeout, just in case
22032280
mActivityManager.unbindBackupAgent(mCurrentPackage.applicationInfo);
22042281
} catch (RemoteException e) {}
22052282
}
22062283
}
22072284

22082285
void restartBackupAlarm() {
2286+
addBackupTrace("setting backup trigger");
22092287
synchronized (mQueueLock) {
22102288
try {
22112289
startBackupAlarmsLocked(mTransport.requestBackupTime());
@@ -2216,6 +2294,7 @@ void restartBackupAlarm() {
22162294
void executeNextState(BackupState nextState) {
22172295
if (MORE_DEBUG) Slog.i(TAG, " => executing next step on "
22182296
+ this + " nextState=" + nextState);
2297+
addBackupTrace("executeNextState => " + nextState);
22192298
mCurrentState = nextState;
22202299
Message msg = mBackupHandler.obtainMessage(MSG_BACKUP_RESTORE_STEP, this);
22212300
mBackupHandler.sendMessage(msg);
@@ -4715,6 +4794,8 @@ private void dataChangedImpl(String packageName, HashSet<ApplicationInfo> target
47154794
// one already there, then overwrite it, but no harm done.
47164795
BackupRequest req = new BackupRequest(packageName);
47174796
if (mPendingBackups.put(app.packageName, req) == null) {
4797+
if (DEBUG) Slog.d(TAG, "Now staging backup of " + packageName);
4798+
47184799
// Journal this request in case of crash. The put()
47194800
// operation returned null when this package was not already
47204801
// in the set; we want to avoid touching the disk redundantly.
@@ -5736,6 +5817,17 @@ private void dumpInternal(PrintWriter pw) {
57365817
pw.println(" " + s);
57375818
}
57385819

5820+
if (DEBUG_BACKUP_TRACE) {
5821+
synchronized (mBackupTrace) {
5822+
if (!mBackupTrace.isEmpty()) {
5823+
pw.println("Most recent backup trace:");
5824+
for (String s : mBackupTrace) {
5825+
pw.println(" " + s);
5826+
}
5827+
}
5828+
}
5829+
}
5830+
57395831
int N = mBackupParticipants.size();
57405832
pw.println("Participants:");
57415833
for (int i=0; i<N; i++) {

0 commit comments

Comments
 (0)