LINUX COMMAND LINE
WGET:
wget --no-clobber --convert-links --random-wait -r -p -E -e robots=off -U mozilla [URL]
wget --no-clobber --convert-links --random-wait -r -p -E -e robots=off -U mozilla -i [PATHTOFILE]
GREP:
grep -r -n -i -C 10 -F -f [PATHTOKEYTWORDLIST] [PATHTOSEARCH] >> [PATHTTORESULT]
grep -oP 'PATERNTOSEARCH:[^"]+' [PATHTOTEXTTOSEARCH] >> [PATHTORESULT]
PDFGREP:
pdfgrep -C10 -e [KEYWORD] -r >> [PATHTTORESULT]
YOUTUBE-DL:
youtube-dl -a [PATHTTOURLLIST]
RECOLL EXPORT:
#####Export research from recoll#####
#####If you want to include a special character such as / to be removed from files using sed, you need to escape it with a backslash \.l#####
#####sed -i 's/<character>//g' file.txtl#####
#####sed -i 's/<character>/<replacement>/g' file.txt#####
grep -oP 'file://[^"]+' search-original.txt >> export-path.txt
sed -i 's/file:\/\///g' export-path.txt
cp export-path.txt export-newpath.txt
sed -i 's/<ORIGINALPATH>/<NEWPATH>/g' export-newpath.txt
cp export-newpath.txt export-newpath-folderonly.txt
#####Than run copy_files.py script#####
#####copy_files.py#####
import os
def copy_files(pathsour_file, pathdest_file):
# Check if the pathlist file exists
if not os.path.exists(pathsour_file):
print(f"Error: Pathlist file '{pathsour_file}' not found.")
return
# Check if the pathdest file exists
if not os.path.exists(pathdest_file):
print(f"Error: Pathdest file '{pathdest_file}' not found.")
return
# Read the lines from pathlist and pathdest files and copy files
with open(pathsour_file, 'r') as pathlist, open(pathdest_file, 'r') as pathdest:
for source_path, destination_path in zip(pathlist, pathdest):
source_path = source_path.strip()
destination_path = destination_path.strip()
if os.path.exists(source_path):
try:
os.makedirs(os.path.dirname(destination_path), exist_ok=True)
os.system(f'cp "{source_path}" "{destination_path}"')
print(f"Copied '{source_path}' to '{destination_path}'")
except OSError as e:
print(f"Error: Unable to copy '{source_path}' to '{destination_path}': {e}")
# Example usage:
if __name__ == "__main__":
pathsour_file = "export-path.txt" # Path to the source path file
pathdest_file = "export-newpath.txt" # Path to the destination path file
copy_files(pathsour_file, pathdest_file)
PYTHON SCRIPTS
#####DOWNLOAD YT VIDEOS#####
from pytube import YouTube
import os
def download_videos(file_path, download_path):
# Create download directory if not exists
if not os.path.exists(download_path):
os.makedirs(download_path)
# Read the video URLs from the file
with open(file_path, 'r') as file:
video_urls = file.readlines()
# Download each video
for url in video_urls:
try:
yt = YouTube(url.strip())
video = yt.streams.get_highest_resolution()
print(f"Downloading {yt.title}...")
video.download(download_path)
print("Download completed successfully!/n")
except Exception as e:
print(f"Error downloading {url}: {str(e)}/n")
if __name__ == "__main__":
file_path = "[FILEPATH]" # Path to the file containing video URLs
download_path = "[DOWNLOADPATH]" # Path to the directory where videos will be downloaded
download_videos(file_path, download_path)
#####SORT AND MOVE FILES PER EXTENSIONS#####
import os
import shutil
def sort_files_recursive_from_file(file_path, extension_file):
with open(file_path, 'r') as file:
folder_paths = file.readlines()
for folder_path in folder_paths:
folder_path = folder_path.strip() # Remove any leading/trailing whitespace or newline characters
if not os.path.isdir(folder_path):
print(f"Invalid folder path: {folder_path}")
continue
sort_files_recursive(folder_path, extension_file)
delete_empty_folders(folder_path)
def sort_files_recursive(folder_path, extension_file):
other_folder = os.path.join(folder_path, "other")
os.makedirs(other_folder, exist_ok=True)
with open(extension_file, 'r') as ext_file:
filter_extensions = ext_file.read().splitlines()
for root, dirs, files in os.walk(folder_path):
for filename in files:
file_path = os.path.join(root, filename)
if os.path.isfile(file_path):
file_ext = os.path.splitext(filename)[1]
dest_folder = other_folder # Default to "other" folder
for filter_ext in filter_extensions:
if file_ext.lower() == filter_ext.lower():
dest_folder = os.path.join(folder_path, filter_ext.lstrip('.').lower())
break
if os.path.abspath(dest_folder) == os.path.abspath(root):
# File is already in the correct destination folder
print(f"File {filename} is already in the proper folder: {dest_folder}")
continue
os.makedirs(dest_folder, exist_ok=True)
dest_file_path = os.path.join(dest_folder, filename)
if os.path.exists(dest_file_path):
# Rename the file if a file with the same name already exists
base_name = os.path.splitext(filename)[0]
count = 1
while os.path.exists(dest_file_path):
new_filename = f"{base_name}_{count}{file_ext}"
dest_file_path = os.path.join(dest_folder, new_filename)
count += 1
print(f"Renaming {filename} to {new_filename}")
shutil.move(file_path, dest_file_path)
print(f"Moved {filename} to {dest_file_path}")
def delete_empty_folders(root_folder):
for root, dirs, files in os.walk(root_folder, topdown=False):
for folder in dirs:
folder_path = os.path.join(root, folder)
if not os.listdir(folder_path): # Check if folder is empty
os.rmdir(folder_path)
print(f"Deleted empty folder: {folder_path}")
# Usage example
file_path = "FILESANDFOLDERSPATH" # Specify the path to your file containing folder paths
extension_file = "[EXTENSIONFILEPATH]" # Specify the path to your file containing the list of extensions
sort_files_recursive_from_file(file_path, extension_file)
#####GENERATE DIRECTORY TO HTML#####
import os
def generate_directory_listing(directory_path):
file_count = 0
folder_count = 0
total_file_size = 0
html_content = '<!DOCTYPE html>\n<html>\n<head>\n<title>Directory Listing</title>\n'
html_content += '<style>\n'
html_content += 'ul { list-style-type: none; }\n'
html_content += '</style>\n'
html_content += '<script>\n'
html_content += 'function performSearch() {\n'
html_content += ' var searchInput = document.getElementById("search");\n'
html_content += ' var searchTerm = searchInput.value.trim();\n'
html_content += ' var fileElements = document.getElementsByClassName("file");\n'
html_content += ' var folderElements = document.getElementsByClassName("folder");\n'
html_content += ' for (var i = 0; i < fileElements.length; i++) {\n'
html_content += ' var fileElement = fileElements[i];\n'
html_content += ' if (searchTerm === "") {\n'
html_content += ' fileElement.style.display = "block";\n'
html_content += ' } else {\n'
html_content += ' var fileName = fileElement.textContent;\n'
html_content += ' if (fileName.includes(searchTerm)) {\n'
html_content += ' fileElement.style.display = "block";\n'
html_content += ' } else {\n'
html_content += ' fileElement.style.display = "none";\n'
html_content += ' }\n'
html_content += ' }\n'
html_content += ' }\n'
html_content += ' for (var j = 0; j < folderElements.length; j++) {\n'
html_content += ' var folderElement = folderElements[j];\n'
html_content += ' var hasVisibleFiles = folderElement.querySelector(".file[style=\'display: block;\']");\n'
html_content += ' if (hasVisibleFiles) {\n'
html_content += ' folderElement.style.display = "block";\n'
html_content += ' } else {\n'
html_content += ' folderElement.style.display = "none";\n'
html_content += ' }\n'
html_content += ' }\n'
html_content += ' return false;\n'
html_content += '}\n'
html_content += '</script>\n'
html_content += '</head>\n<body>\n'
html_content += '<h1>Directory Listing</h1>\n'
html_content += '<form onsubmit="return performSearch()">\n'
html_content += '<label for="search">Search:</label>\n'
html_content += '<input type="text" name="search" id="search" value="" />\n'
html_content += '<input type="submit" value="Search" />\n'
html_content += '</form>\n'
html_content += '<ul>\n'
for root, dirs, files in os.walk(directory_path):
html_content += f'<li class="folder" style="display: block;"><strong>Current Directory:</strong> {root}</li>\n'
# Count files and calculate total size
file_count += len(files)
folder_count += len(dirs)
for f in files:
file_path = os.path.join(root, f)
total_file_size += os.path.getsize(file_path)
# Display files
for f in files:
file_path = os.path.join(root, f)
file_name = os.path.basename(file_path)
html_content += f'<li class="file" style="display: block;"><a href="file:///{file_path}" target="_blank">{file_name}</a></li>\n'
html_content += '</ul>\n'
html_content += f'<p><strong>Total Files:</strong> {file_count}</p>\n'
html_content += f'<p><strong>Total Folders:</strong> {folder_count}</p>\n'
html_content += f'<p><strong>Total File Size:</strong> {total_file_size / (1024 * 1024):.2f} MB</p>\n'
html_content += '</body>\n</html>\n'
return html_content
# Specify the directory path to list files and folders
directory_path = "[DIRECTORYPATH]"
# Generate the directory listing HTML content
html_content = generate_directory_listing(directory_path)
# Specify the desired output file path
output_file_path = 'directory_listing.html'
# Save the HTML content to the output file
with open(output_file_path, 'w', encoding='utf-8') as output_file:
output_file.write(html_content)
print(f"Directory listing HTML saved to: {output_file_path}")