Как удалить повторные файлы с Google Drive в Python
def main(temporary_workspace, workspace):
store = file.Storage('tokenRead.json')
big_list_of_file_ids = []
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
creds = tools.run_flow(flow, store)
service = build('drive', 'v3', http=creds.authorize(Http()))
# Call the Drive v3 API
results = service.files().list(
q="'MAIN_FOLDER_WITH_SUBFOLDERS_ID' in parents",
pageSize=1000, fields="nextPageToken, files(id, name)").execute()
items = results.get('files', [])
list_of_folders_and_ids = []
if not items:
raise RuntimeError('No files found.')
else:
for item in items:
list_of_folders_and_ids.append((item['name'], item['id']))
list_of_folders_and_ids.sort(key=lambda x: x[0])
for folder_id in list_of_folders_and_ids:
start_date = folder_id[0][:-3]
id = folder_id[1]
print('Folder: ', start_date, ', ID: ', id)
query_string = "'{}' in parents".format(id)
results = service.files().list(
q=query_string, fields="nextPageToken, files(id, name)"
).execute()
items = results.get('files', [])
list_of_files_and_ids = []
if not items:
raise RuntimeError('No files found.')
else:
for item in items:
list_of_files_and_ids.append((item['name'], item['id']))
for file_id in list_of_files_and_ids:
# Downloading the files
if file_id[1] not in big_list_of_file_ids:
big_list_of_file_ids.append(file_id[1])
else:
print('Duplicate file ID!')
exit()
print('\tFile: ', file_id[0], ', ID: ', file_id[1])
request = service.files().get_media(fileId=file_id[1])
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print("Download: {}".format(int(status.progress() * 100)))
fh.seek(0)
temporary_location = os.path.join(tmp_workspace, file_id[0])
with open(temporary_location, 'wb') as out:
out.write(fh.read())
fh.close()
convert_all_netcdf(temporary_workspace, start_date, workspace, r'Qout_south_america_continental',
num_of_rivids=62317)
os.system('rm -rf %s/*' % tmp_workspace)
Stupid Shrike