diff --git a/AndroidSMSBackupRestoreCleaner.iml b/AndroidSMSBackupRestoreCleaner.iml new file mode 100644 index 0000000..5b1faca --- /dev/null +++ b/AndroidSMSBackupRestoreCleaner.iml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 4a65124..c389ccb 100644 --- a/README.md +++ b/README.md @@ -45,3 +45,49 @@ Emoji needs the `lxml` package. Use `pip install lxml` to install it first, then * Python 2 only. (For now?) * ~~No emoji support~~ Now supports emoji! See above. * No MMS duplicate filtering. It just maintains the MMS entries as is. + +# DB info + +''' + +CREATE TABLE smss(protocol text, address text, date text, type text, subject text, body text, toa text, sc_toa text, service_center text, read text, status text, locked text, date_sent text, readable_date text, contact_name text, primary key (address, date));; + +CREATE TABLE mmss(text_only text, ct_t text, + using_mode text, msg_box text, secret_mode text, + v text, retr_txt_cs text, ct_cls text, favorite text, + d_rpt_st text, deletable text, st text, sim_imsi text, + creator text, tr_id text, sim_slot text, + read text, m_id text, callback_set text, m_type text, retr_txt text, + locked text, resp_txt text, rr_st text, safe_message text, + retr_st text, reserved text, msg_id text, hidden text, + sub text, rr text, seen text, ct_l text, from_address text, + m_size text, exp text, sub_cs text, sub_id text, resp_st text, + date text, app_id text, date_sent text, pri text, address text, + read_status text, d_tm text, d_rpt text, device_name text, + spam_report text, rpt_a text, m_cls text, readable_date text, + contact_name text, network_type text, privacy_mode text, + id integer, + primary key (address, date)); + +CREATE TABLE parts(seq text, +ct text, +name text, +chset text, +cd text, +fn text, +cid text, +cl text, +ctt_s text, +ctt_t text, +text text, +data text, +fk_id_mms integer, +primary key (data, text, name)); + +CREATE TABLE addrs(address text, +type text, +charset text, +fk_id_mms integer, +primary key (fk_id_mms, charset, type, address)); + +''' \ No newline at end of file diff --git a/clean.py b/clean.py index abfeda2..01e008f 100755 --- a/clean.py +++ b/clean.py @@ -12,7 +12,12 @@ import logging as log from datetime import datetime import argparse -import glob, fnmatch, os +import glob, fnmatch, os, re + +# TODO SMSCOUNT backup_set backup_date +# + +GLOB_ID_MMS = 0 def main(input_paths, output_path): @@ -21,7 +26,10 @@ def main(input_paths, output_path): log.basicConfig(level=log.DEBUG, format='%(asctime)s %(message)s') log.debug('Starting Operation...') conn = sqlite3.connect('sms.db') - conn.execute('DELETE FROM messages') + conn.execute('DELETE FROM smss') + conn.execute("DELETE FROM mmss") + conn.execute("DELETE FROM parts") + conn.execute("DELETE FROM addrs") mms_list = [] root = XML.Element("smses") @@ -29,11 +37,12 @@ def main(input_paths, output_path): log.debug("Parsing XML file: ") tree = XML.parse(xml_filename, parser=custom_parser) log.debug("Done.") - mms_list = mms_list + load_into_db(conn, tree) + load_into_db(conn, tree) add_sms(conn, root) + add_mms(conn, root) conn.close() - append_mms(mms_list, root) + #append_mms(mms_list, root) write_file(output_path, root) time_end = datetime.now() log.debug('Operation completed in %d seconds.' % ((time_end - time_start).total_seconds())) @@ -55,12 +64,12 @@ def append_mms(mms_list, root): def add_sms(conn, root): log.debug("Rewriting into optimized XML...") - cursor = conn.execute("SELECT COUNT() FROM messages") + cursor = conn.execute("SELECT COUNT() FROM smss") sms_count = cursor.fetchone() root.set("count", "%d" % sms_count[0]) log.debug("Attempting to write new XML for SMS count: " + str(sms_count[0])) # Get the rows - cursor = conn.execute("SELECT * FROM messages ORDER BY date_sent") + cursor = conn.execute("SELECT * FROM smss ORDER BY date_sent") for row in cursor: # newfile.write("\n" % (row[0], row[5])) sms_element = XML.SubElement(root, "sms") @@ -82,37 +91,279 @@ def add_sms(conn, root): conn.commit() +def add_mms(conn, root): + log.debug("Rewriting into optimized XML...") + cursor = conn.execute("SELECT COUNT() FROM mmss") + mms_count = cursor.fetchone() + root.set("count", "%d" % mms_count[0]) + log.debug("Attempting to write new XML for MMS count: " + str(mms_count[0])) + # Get the rows + cursorMMS = conn.execute("SELECT * FROM mmss ORDER BY date_sent") + for rowMMS in cursorMMS: + mms_element = XML.SubElement(root, "mms") + # retrocompatibility + mms_element.set("text_only", rowMMS[0]) + + mms_element.set("ct_t", rowMMS[1]) + + using_mode = rowMMS[2] + if using_mode is not None: + mms_element.set("using_mode", using_mode) + + mms_element.set("msg_box", rowMMS[3]) + + secret_mode = rowMMS[4] + if secret_mode is not None: + mms_element.set("secret_mode", secret_mode) + + mms_element.set("v", rowMMS[5]) + + mms_element.set("retr_txt_cs", rowMMS[6]) + + mms_element.set("ct_cls", rowMMS[7]) + + favorite = rowMMS[8] + if favorite is not None: + mms_element.set("favorite", favorite) + + d_rpt_st = rowMMS[9] + if d_rpt_st is not None: + mms_element.set("d_rpt_st", d_rpt_st) + + deletable = rowMMS[10] + if deletable is not None: + mms_element.set("deletable", deletable) + + mms_element.set("st", rowMMS[11]) + + sim_imsi = rowMMS[12] + if sim_imsi is not None: + mms_element.set("sim_imsi", sim_imsi) + + creator = rowMMS[13] + if creator is not None: + mms_element.set("creator", creator) + + mms_element.set("tr_id", rowMMS[14]) + + sim_slot = rowMMS[15] + if sim_slot is not None: + mms_element.set("sim_slot", sim_slot) + + mms_element.set("read", rowMMS[16]) + + mms_element.set("m_id", rowMMS[17]) + + callback_set = rowMMS[18] + if callback_set is not None: + mms_element.set("callback_set", callback_set) + + mms_element.set("m_type", rowMMS[19]) + + mms_element.set("retr_txt", rowMMS[20]) + + mms_element.set("locked", rowMMS[21]) + + mms_element.set("resp_txt", rowMMS[22]) + + rr_st = rowMMS[23] + if rr_st is not None: + mms_element.set("rr_st", rr_st) + + safe_message = rowMMS[24] + if safe_message is not None: + mms_element.set("safe_message", safe_message) + + mms_element.set("retr_st", rowMMS[25]) + + reserved = rowMMS[26] + if reserved is not None: + mms_element.set("reserved", reserved) + + msg_id = rowMMS[27] + if msg_id is not None: + mms_element.set("msg_id", msg_id) + + hidden = rowMMS[28] + if hidden is not None: + mms_element.set("hidden", hidden) + + mms_element.set("sub", rowMMS[29]) + mms_element.set("rr", rowMMS[30]) + mms_element.set("seen", rowMMS[31]) + mms_element.set("ct_l", rowMMS[32]) + + from_address = rowMMS[33] + if from_address is not None: + mms_element.set("from_address", from_address) + + mms_element.set("m_size", rowMMS[34]) + mms_element.set("exp", rowMMS[35]) + mms_element.set("sub_cs", rowMMS[36]) + mms_element.set("sub_id", rowMMS[37]) + mms_element.set("resp_st", rowMMS[38]) + mms_element.set("date", rowMMS[39]) + + app_id = rowMMS[40] + if app_id is not None: + mms_element.set("app_id", app_id) + + mms_element.set("date_sent", rowMMS[41]) + mms_element.set("pri", rowMMS[42]) + mms_element.set("address", rowMMS[43]) + mms_element.set("read_status", rowMMS[44]) + mms_element.set("d_tm", rowMMS[45]) + mms_element.set("d_rpt", rowMMS[46]) + + device_name = rowMMS[47] + if device_name is not None: + mms_element.set("device_name", device_name) + + spam_report = rowMMS[48] + if spam_report is not None: + mms_element.set("spam_report", spam_report) + + mms_element.set("rpt_a", rowMMS[49]) + mms_element.set("m_cls", rowMMS[50]) + mms_element.set("readable_date", rowMMS[51]) + mms_element.set("contact_name", rowMMS[52]) + network_type = rowMMS[53] + if network_type is not None: + mms_element.set("network_type", network_type) + privacy_mode = rowMMS[54] + if privacy_mode is not None: + mms_element.set("privacy_mode", privacy_mode) + # ----- write parts and addrs + id_mms = rowMMS[55] + conn2 = sqlite3.connect('sms.db') + cursorParts = conn2.execute("SELECT * FROM parts WHERE fk_id_mms ="+str(id_mms)) + parts_root = XML.SubElement(mms_element, "parts") + for rowPart in cursorParts: + part_element = XML.SubElement(parts_root, "part") + part_element.set("seq", rowPart[0]) + part_element.set("ct", rowPart[1]) + part_element.set("name", rowPart[2]) + part_element.set("chset", rowPart[3]) + part_element.set("cd", rowPart[4]) + part_element.set("fn", rowPart[5]) + part_element.set("cid", rowPart[6]) + part_element.set("cl", rowPart[7]) + part_element.set("ctt_s", rowPart[8]) + part_element.set("ctt_t", rowPart[9]) + # could be text or data + text = rowPart[10] + if text is not None: + part_element.set("text", text) + else: + part_element.set("text", "null") + data = rowPart[11] + if data is not None: + part_element.set("data", data) + else: + part_element.set("data", "null") + + cursorAddrs = conn2.execute("SELECT * FROM addrs WHERE fk_id_mms ="+str(id_mms)) + addrs_root = XML.SubElement(mms_element, "addrs") + for rowAddr in cursorAddrs: + addr_element = XML.SubElement(addrs_root, "addr") + addr_element.set("address", rowAddr[0]) + addr_element.set("type", rowAddr[1]) + addr_element.set("charset", rowAddr[2]) + if 'conn2' in locals(): + conn2.close() + conn.commit() + + +def insert_default(conn, sql, vals, child): + try: + conn.execute(sql, vals) + except sqlite3.IntegrityError: + # This is a duplicate error. Skip this sms entry. Filter this nosy dupe out! + # log.info("Skipping: Found IntegrityError when processing child: " + str(child)) + # log.info("\tException: " + e.message) + return False + except sqlite3.OperationalError as e: + log.info("Skipping: Found OperationalError when processing child (%s): %s" % (child.tag, str(child))) + log.info("\tException: " + e.message) + return False + return True + + +def insert_sms(conn, child): + columns = ', '.join(child.attrib.keys()) + placeholders = ', '.join('?' * len(child.attrib)) + sql = 'INSERT INTO smss ({}) VALUES ({})'.format(columns, placeholders) + vals = child.attrib.values() + return insert_default(conn, sql, vals, child) + + +def mms_compatibility(child, columns): + oppo_counter = columns.count("oppo") + columns = re.sub('(oppo_[a-z_]+,)', '', columns) + log.debug("columns %s" % columns) + placeholder_counter = (len(child.attrib)+1-oppo_counter) + placeholders = ', '.join('?' * placeholder_counter) + sql = 'INSERT INTO mmss ({}) VALUES ({})'.format(columns, placeholders) + return sql + +def insert_mms(conn, child): + columns = 'id ,'+', '.join(child.attrib.keys()) + sql = mms_compatibility(child, columns) + #id_mms = conn.execute('SELECT IFNULL(MAX(id), 0) + 1 FROM mmss') + global GLOB_ID_MMS + id_mms = GLOB_ID_MMS + vals = child.attrib.values() + vals.insert(0, id_mms) + # ---------- PARTS AND ADDRS --------------- + rst = insert_default(conn, sql, vals, child) + + GLOB_ID_MMS = GLOB_ID_MMS+1 + if rst: + child_parts = child.find('parts') + child_addrs = child.find('addrs') + if child_parts is not None: + for child_part in child_parts.findall('part'): + insert_part(conn, id_mms, child_part) + if child_addrs is not None: + for child_addr in child_addrs.findall('addr'): + insert_addr(conn, id_mms, child_addr) + return id_mms + + +def insert_part(conn, id_mms, child_mms): + columns_part = 'fk_id_mms ,'+', '.join(child_mms.attrib.keys()) + placeholders_part = ', '.join('?' * (len(child_mms.attrib)+1)) + sql = 'INSERT INTO parts ({}) VALUES ({})'.format(columns_part, placeholders_part) + vals = child_mms.attrib.values() + vals.insert(0, id_mms) + return insert_default(conn, sql, vals, child_mms) + + +def insert_addr(conn, id_mms, child_mms): + columnsAddr = 'fk_id_mms ,'+', '.join(child_mms.attrib.keys()) + placeholdersAddr = ', '.join('?' * (len(child_mms.attrib)+1)) + sql = 'INSERT INTO addrs ({}) VALUES ({})'.format(columnsAddr, placeholdersAddr) + vals = child_mms.attrib.values() + vals.insert(0, id_mms) + return insert_default(conn, sql, vals, child_mms) + + def load_into_db(conn, tree): root = tree.getroot() log.debug("Loading MMS data into DB...") num_skipped = 0 mms_list = [] for child in root: + rst = False if child.tag == "mms": - log.debug("Skipping MMS element %s" % str(child)) - mms_list.append(child) + rst = insert_mms(conn, child) + elif child.tag == "sms": + rst = insert_sms(conn, child) + if not rst: num_skipped += 1 - continue - - columns = ', '.join(child.attrib.keys()) - placeholders = ', '.join('?' * len(child.attrib)) - sql = 'INSERT INTO messages ({}) VALUES ({})'.format(columns, placeholders) - - try: - conn.execute(sql, child.attrib.values()) - except sqlite3.IntegrityError: - # This is a duplicate error. Skip this sms entry. Filter this nosy dupe out! - # log.info("Skipping: Found IntegrityError when processing child: " + str(child)) - # log.info("\tException: " + e.message) - num_skipped += 1 - pass - except sqlite3.OperationalError as e: - log.info("Skipping: Found OperationalError when processing child (%s): %s" % (child.tag, str(child))) - log.info("\tException: " + e.message) - num_skipped += 1 - pass + root.clear() # Clear this super huge tree. We don't need it anymore - log.debug("Done skipping MMS. Skipped entries: " + str(num_skipped)) + log.debug("Done duplicate check. Skipped duplicate entries: " + str(num_skipped)) conn.commit() return mms_list diff --git a/sms.db b/sms.db index 6b4d2ee..5f333b9 100755 Binary files a/sms.db and b/sms.db differ diff --git a/sms.xml b/sms.xml index abf95dd..044f769 100755 --- a/sms.xml +++ b/sms.xml @@ -1,6 +1,24 @@ - + + + \ No newline at end of file diff --git a/sms_old.xml b/sms_old.xml new file mode 100644 index 0000000..b834321 --- /dev/null +++ b/sms_old.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/smsrst.xml b/smsrst.xml new file mode 100644 index 0000000..9fd1fbc --- /dev/null +++ b/smsrst.xml @@ -0,0 +1,2 @@ + + \ No newline at end of file