Skip to content

Commit 01e5b56

Browse files
Use zip in backup (#268)
* Refactor auto backup. * Backup to .zip file * Backup to .zip file * Backup to .zip file * Look for any .json file in the .zip file * Add github action to check if the backup file was created. * Create github action to check backup. * Test github action. * Test github action. * Test github action. * Test github action. * Test github action. * Test github action. * Test github action. * Test github action. * Test github action. * Test github action. * Test github action. * Add logs. * Add backup flag validation. * Import no_backup var. * Import no_backup var from config file. * Enable tmate for debugging. * Force error. * Force error. * Refactor. * Remove tmate. * Remove no backup variable from config. * Improve messages. * Improve messages. * Improve messages. * Check backup file size. * Update slack channel.
1 parent eb9021e commit 01e5b56

File tree

3 files changed

+213
-23
lines changed

3 files changed

+213
-23
lines changed

.github/scripts/check_backup.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""
2+
This script checks if a backup file for the current date exists in a specified S3 bucket.
3+
If the backup file does not exist, a notification is sent to a Slack channel.
4+
5+
Expected file format in the S3 bucket:
6+
- The file should be in the folder 'db_backup/' with the following naming pattern:
7+
'smartapi_YYYYMMDD.zip', where YYYYMMDD corresponds to the current date.
8+
9+
Required Environment Variables:
10+
- AWS_ACCESS_KEY_ID: The AWS access key ID to read the AWS s3 bucket.
11+
- AWS_SECRET_ACCESS_KEY: The AWS secret access key to read the AWS s3 bucket.
12+
- BACKUP_BUCKET_NAME: The name of the AWS S3 bucket where backups are stored.
13+
- S3_FOLDER: The folder path within the S3 bucket where backups are stored (e.g., 'db_backup/').
14+
- AWS_REGION: The AWS region where the S3 bucket is located.
15+
- SLACK_CHANNEL: The Slack channel where notifications should be sent (e.g., '#observability-test').
16+
- SLACK_WEBHOOK_URL: The Slack Webhook URL used to send the notification.
17+
18+
Functionality:
19+
1. The script uses the AWS SDK (boto3) to check for the existence of the backup file in the specified S3 bucket.
20+
2. If the file is found, it logs that no action is needed.
21+
3. If the file is not found, it sends a notification to the configured Slack channel.
22+
23+
Dependencies:
24+
- boto3: For interacting with AWS S3.
25+
- requests: For sending HTTP POST requests to Slack.
26+
27+
"""
28+
29+
import boto3
30+
import botocore
31+
import os
32+
import requests
33+
34+
from datetime import datetime
35+
36+
37+
def send_slack_notification(message):
38+
39+
print(f" └─ {message}")
40+
41+
# Create the payload for Slack
42+
slack_data = {
43+
"channel": os.getenv("SLACK_CHANNEL"),
44+
"username": "SmartAPI",
45+
"icon_emoji": ":thumbsdown:",
46+
"text": message,
47+
}
48+
49+
try:
50+
print(" └─ Sending Slack notification.")
51+
response = requests.post(os.getenv("SLACK_WEBHOOK_URL"), json=slack_data, timeout=10)
52+
if response.status_code == 200:
53+
print(" └─ Slack notification sent successfully.")
54+
else:
55+
print(f" └─ Failed to send message to Slack: {response.status_code}, {response.text}")
56+
except requests.exceptions.Timeout as e:
57+
print(" └─ Request timed out to Slack WebHook URL.")
58+
raise e
59+
except requests.exceptions.RequestException as e:
60+
print(f" └─ Failed to send Slack notification. Error: {str(e)}")
61+
raise e
62+
63+
64+
def check_backup_file():
65+
66+
# Create the expected file name
67+
today_date = datetime.today().strftime("%Y%m%d")
68+
expected_file = f"{os.getenv('S3_FOLDER')}smartapi_{today_date}.zip"
69+
70+
# Create the S3 client
71+
s3_client = boto3.client("s3", region_name=os.getenv("AWS_REGION"))
72+
73+
# Try to fetch the file metadata
74+
try:
75+
response = s3_client.head_object(Bucket=os.getenv("BACKUP_BUCKET_NAME"), Key=expected_file)
76+
print(f" └─ Backup file {expected_file} exists!")
77+
78+
# Get the file size in bytes
79+
file_size = response['ContentLength']
80+
81+
# Check if the file is larger than 1MB
82+
if file_size > 1048576: # 1MB in bytes
83+
print(f" └─ Backup file is larger than 1MB! Size: {file_size} bytes.")
84+
print(" └─ Nothing to do!")
85+
else:
86+
message = f":alert: The backup file {expected_file} is smaller than 1MB!"
87+
send_slack_notification(message)
88+
89+
except botocore.exceptions.ClientError as e:
90+
print(e)
91+
message = f":alert: The backup file {expected_file} was NOT created today!"
92+
send_slack_notification(message)
93+
94+
95+
if __name__ == "__main__":
96+
check_backup_file()

.github/workflows/check_backup.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Check S3 Backup and Notify Slack
2+
3+
on:
4+
workflow_dispatch: # Allows manual trigger from GitHub Actions UI
5+
schedule:
6+
- cron: '0 13 * * *' # 5:00 AM PST (UTC-8)
7+
8+
jobs:
9+
check-backup:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Checkout repository
14+
uses: actions/checkout@v2
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v4
18+
with:
19+
python-version: '3.x'
20+
21+
- name: Install boto3 (AWS SDK for Python)
22+
run: |
23+
python -m pip install --upgrade pip
24+
pip install boto3 requests
25+
26+
- name: Check if backup exists in S3
27+
run: python .github/scripts/check_backup.py
28+
env:
29+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
30+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
31+
AWS_REGION: ${{ secrets.AWS_REGION }}
32+
BACKUP_BUCKET_NAME: "${{ secrets.BACKUP_BUCKET_NAME }}"
33+
S3_FOLDER: "db_backup/"
34+
SLACK_CHANNEL: "#ncats-translator"
35+
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

src/admin.py

Lines changed: 82 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import logging
2727
import random
2828
import time
29+
import zipfile
30+
import io
2931
from datetime import datetime
3032

3133
import boto3
@@ -37,20 +39,48 @@
3739
logging.basicConfig(level="INFO")
3840

3941

40-
def _default_filename():
41-
return "smartapi_" + datetime.today().strftime("%Y%m%d") + ".json"
42+
def _default_filename(extension=".json"):
43+
return "smartapi_" + datetime.today().strftime("%Y%m%d") + extension
4244

4345

44-
def save_to_file(mapping, filename=None):
45-
filename = filename or _default_filename()
46-
with open(filename, "w") as file:
47-
json.dump(mapping, file, indent=2)
48-
49-
50-
def save_to_s3(mapping, filename=None, bucket="smartapi"):
51-
filename = filename or _default_filename()
46+
def save_to_file(mapping, filename=None, format="zip"):
47+
"""
48+
Save data to a file in either JSON or ZIP format.
49+
:param mapping: Data to save
50+
:param filename: File name
51+
:param format: File format, either 'json' or 'zip'
52+
"""
53+
if format == "zip":
54+
filename = filename or _default_filename(".zip")
55+
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zfile:
56+
json_data = json.dumps(mapping, indent=2)
57+
zfile.writestr(filename.replace(".zip", ".json"), json_data)
58+
else:
59+
filename = filename or _default_filename(".json")
60+
with open(filename, "w") as file:
61+
json.dump(mapping, file, indent=2)
62+
63+
64+
def save_to_s3(data, filename=None, bucket="smartapi", format="zip"):
65+
"""
66+
Save data to S3 in either JSON or ZIP format.
67+
:param data: Data to save
68+
:param filename: File name
69+
:param bucket: S3 bucket name
70+
:param format: File format, either 'json' or 'zip'
71+
"""
72+
filename = filename or _default_filename(f".{format}")
5273
s3 = boto3.resource("s3")
53-
s3.Bucket(bucket).put_object(Key="db_backup/{}".format(filename), Body=json.dumps(mapping, indent=2))
74+
75+
if format == "zip":
76+
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zfile:
77+
json_data = json.dumps(data, indent=2)
78+
zfile.writestr(filename.replace(".zip", ".json"), json_data)
79+
logging.info(f"Uploading {filename} to AWS S3")
80+
s3.Bucket(bucket).upload_file(Filename=filename, Key=f"db_backup/{filename}")
81+
else:
82+
logging.info(f"Uploading {filename} to AWS S3")
83+
s3.Bucket(bucket).put_object(Key=f"db_backup/{filename}", Body=json.dumps(data, indent=2))
5484

5585

5686
def _backup():
@@ -69,14 +99,14 @@ def _backup():
6999
return smartapis
70100

71101

72-
def backup_to_file(filename=None):
102+
def backup_to_file(filename=None, format="zip"):
73103
smartapis = _backup()
74-
save_to_file(smartapis, filename)
104+
save_to_file(smartapis, filename, format)
75105

76106

77-
def backup_to_s3(filename=None, bucket="smartapi"):
107+
def backup_to_s3(filename=None, bucket="smartapi", format="zip"):
78108
smartapis = _backup()
79-
save_to_s3(smartapis, filename, bucket)
109+
save_to_s3(smartapis, filename, bucket, format)
80110

81111

82112
def _restore(smartapis):
@@ -99,7 +129,7 @@ def restore_from_s3(filename=None, bucket="smartapi"):
99129
s3 = boto3.client("s3")
100130

101131
if not filename:
102-
objects = s3.list_objects_v2(Bucket="smartapi", Prefix="db_backup")["Contents"]
132+
objects = s3.list_objects_v2(Bucket=bucket, Prefix="db_backup")["Contents"]
103133
filename = max(objects, key=lambda x: x["LastModified"])["Key"]
104134

105135
if not filename.startswith("db_backup/"):
@@ -108,14 +138,42 @@ def restore_from_s3(filename=None, bucket="smartapi"):
108138
logging.info("GET s3://%s/%s", bucket, filename)
109139

110140
obj = s3.get_object(Bucket=bucket, Key=filename)
111-
smartapis = json.loads(obj["Body"].read())
141+
142+
filename = filename.replace("db_backup/", "")
143+
144+
if filename.endswith(".zip"):
145+
file_content = obj["Body"].read()
146+
with zipfile.ZipFile(io.BytesIO(file_content)) as zfile:
147+
# Search for a JSON file inside the ZIP
148+
json_file = next((f for f in zfile.namelist() if f.endswith(".json")), None)
149+
if not json_file:
150+
raise ValueError("No JSON file found inside the ZIP archive.")
151+
with zfile.open(json_file) as json_data:
152+
smartapis = json.load(json_data)
153+
elif filename.endswith(".json"):
154+
smartapis = json.loads(obj["Body"].read())
155+
else:
156+
raise Exception("Unsupported backup file type!")
157+
112158
_restore(smartapis)
113159

114160

115161
def restore_from_file(filename):
116-
with open(filename) as file:
117-
smartapis = json.load(file)
118-
_restore(smartapis)
162+
if filename.endswith(".zip"):
163+
with zipfile.ZipFile(filename, 'r') as zfile:
164+
# Search for a JSON file inside the ZIP
165+
json_file = next((f for f in zfile.namelist() if f.endswith(".json")), None)
166+
if not json_file:
167+
raise ValueError("No JSON file found inside the ZIP archive.")
168+
with zfile.open(json_file) as json_data:
169+
smartapis = json.load(json_data)
170+
elif filename.endswith(".json"):
171+
with open(filename) as file:
172+
smartapis = json.load(file)
173+
else:
174+
raise Exception("Unsupported backup file type!")
175+
176+
_restore(smartapis)
119177

120178

121179
def refresh_document():
@@ -226,7 +284,7 @@ def refresh_has_metakg():
226284
_lock = FileLock(".lock", timeout=0)
227285

228286

229-
def routine(no_backup=False):
287+
def routine(no_backup=False, format="zip"):
230288
logger = logging.getLogger("routine")
231289

232290
# Add jitter: random delay between 100 and 500 milliseconds (adjust range as needed)
@@ -244,8 +302,8 @@ def routine(no_backup=False):
244302
if lock_acquired:
245303
logger.info("Schedule lock acquired successfully.")
246304
if not no_backup:
247-
logger.info("backup_to_s3()")
248-
backup_to_s3()
305+
logger.info(f"backup_to_s3(format={format})")
306+
backup_to_s3(format=format)
249307
logger.info("refresh_document()")
250308
refresh_document()
251309
logger.info("check_uptime()")
@@ -262,6 +320,7 @@ def routine(no_backup=False):
262320
logger.warning("Schedule lock acquired by another process. No need to run it in this process.")
263321
except Exception as e:
264322
logger.error(f"An error occurred during the routine: {e}")
323+
logger.error("Stack trace:", exc_info=True)
265324
finally:
266325
if lock_acquired:
267326
_lock.release()

0 commit comments

Comments
 (0)