-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathcodeclip.py
More file actions
357 lines (312 loc) · 12.5 KB
/
codeclip.py
File metadata and controls
357 lines (312 loc) · 12.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
#!/usr/bin/env python3
# codeclip.py
# A lightweight Python script to copy an entire codebase to the system clipboard or file
# Designed for sharing code with LLMs or other text-based tools
# Version: 1.3.2
import os
import sys
import time
import logging
import signal
import pyperclip
import argparse
import pathspec
import configparser
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
# Setup logging - will be configured based on command line arguments
logger = logging.getLogger("codeclip")
# Global flag for graceful cancellation
cancel_requested = False
def signal_handler(sig, frame):
"""Handle Ctrl+C (SIGINT) gracefully."""
global cancel_requested
if not cancel_requested:
print("\n\nCancellation requested. Finishing current tasks and cleaning up...")
cancel_requested = True
else:
print("\nForced exit. Some operations may not complete cleanly.")
sys.exit(1)
# Register the signal handler
signal.signal(signal.SIGINT, signal_handler)
def load_config(config_file="codeclip.ini"):
"""Load settings from a config file if it exists."""
config = configparser.ConfigParser()
defaults = {"max_size_kb": "1024", "ignore_file": ".clipignore", "chunk_size": "50", "threads": "4"}
config["DEFAULT"] = defaults
if os.path.exists(config_file):
config.read(config_file)
print(f"Loaded config from {config_file}")
return config["DEFAULT"]
def load_ignore_patterns(ignore_file=".clipignore"):
"""Load ignore patterns from the specified ignore file."""
patterns = []
if os.path.exists(ignore_file):
with open(ignore_file, "r", encoding="utf-8") as f:
patterns = [line.strip() for line in f if line.strip() and not line.startswith("#")]
print(f"Found {ignore_file} with {len(patterns)} patterns.")
else:
print(f"No {ignore_file} found. All files will be included unless filtered.")
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
def is_binary_file(file_path):
"""Check if a file is binary based on extension or content."""
binary_extensions = {
'.exe', '.dll', '.obj', '.bin', '.dat', '.png', '.jpg', '.jpeg',
'.gif', '.bmp', '.ico', '.pdf', '.zip', '.rar', '.7z', '.tar',
'.gz', '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.psd', '.so',
'.woff', '.woff2', '.eot', '.ttf', '.otf', '.pyc', '.pyo', '.o',
'.svgz', '.msi', '.jar', '.war', '.ear', '.class'
}
ext = os.path.splitext(file_path.lower())[1]
if ext in binary_extensions:
return True
try:
with open(file_path, "rb") as f:
chunk = f.read(1024)
return b"\0" in chunk or any(b >= 128 for b in chunk[:100])
except Exception:
return False
def create_default_ignore(ignore_file=".clipignore"):
"""Create a default .clipignore file if it doesn't exist."""
default_ignore = """# CodeClip ignore file
# Lines starting with # are comments
# Supports .gitignore-style patterns
# Version control
.git/
.svn/
.hg/
# Build outputs
bin/
obj/
build/
dist/
# Dependencies
node_modules/
packages/
vendor/
.venv/
venv/
env/
__pycache__/
# IDE files
.vs/
.vscode/
.idea/
*.suo
*.user
*.userosscache
*.sln.docstates
# Logs and databases
*.log
*.sqlite
*.db
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
"""
with open(ignore_file, "w", encoding="utf-8") as f:
f.write(default_ignore)
print(f"Created default {ignore_file}")
def process_file(file_path, root_dir, ignore_spec, max_size_kb, verbose=False):
"""Process a single file and return its output lines."""
global cancel_requested
if cancel_requested:
return [], True, False, False
rel_path = os.path.relpath(file_path, root_dir)
output = []
if ignore_spec.match_file(rel_path):
if verbose:
print(f"Ignored: {rel_path}")
return None, True, False, False
if is_binary_file(file_path):
if verbose:
print(f"Binary skipped: {rel_path}")
return None, True, False, False
if (os.path.getsize(file_path) / 1024) > max_size_kb:
output.append(f"{rel_path}")
output.append(f"[File too large: {os.path.getsize(file_path) / 1024:.2f} KB]")
output.append("---")
if verbose:
print(f"Too large: {rel_path}")
return output, False, False, True
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
output.append(f"{rel_path}")
output.append(content)
output.append("---")
if verbose:
print(f"Added: {rel_path}")
return output, False, False, False
except Exception as e:
output.append(f"{rel_path}")
output.append(f"[Error reading file: {str(e)}]")
output.append("---")
logger = logging.getLogger("codeclip")
logger.warning(f"Error reading {rel_path}: {str(e)}")
if verbose:
print(f"Error: {rel_path} - {str(e)}")
return output, False, True, False
def display_log_file(log_file="codeclip.log"):
"""Display the contents of the log file."""
if not os.path.exists(log_file):
print(f"No log file found at {log_file}")
return
try:
with open(log_file, "r", encoding="utf-8") as f:
log_content = f.read()
if not log_content.strip():
print("Log file exists but is empty.")
return
print("\n" + "="*50)
print(f"Contents of {log_file}:")
print("="*50)
print(log_content)
print("="*50)
except Exception as e:
print(f"Error reading log file: {str(e)}")
def copy_codebase_to_clipboard(create_ignore=False, max_size_kb=1024, verbose=False, chunk_size=50, threads=4, output_to_file=False, show_log=False):
"""Copy codebase to clipboard or file with chunking, multithreading, and progress bar."""
global cancel_requested
# Configure logging based on show_log parameter
logger = logging.getLogger("codeclip")
if show_log:
log_file = "codeclip.log"
handler = logging.FileHandler(log_file)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.setLevel(logging.WARNING)
logger.addHandler(handler)
else:
# Set up a null handler if logging is disabled
logger.addHandler(logging.NullHandler())
config = load_config()
ignore_file = config.get("ignore_file", ".clipignore")
max_size_kb = int(config.get("max_size_kb", max_size_kb))
chunk_size = int(config.get("chunk_size", chunk_size))
threads = int(config.get("threads", threads))
root_dir = os.getcwd()
if create_ignore and not os.path.exists(ignore_file):
create_default_ignore(ignore_file)
start_time = time.time()
ignore_spec = load_ignore_patterns(ignore_file)
output = []
# Header
output.append("# CodeClip Export")
output.append(f"# Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}")
output.append(f"# Directory: {root_dir}")
output.append("# ----------------------------------------")
output.append("")
print("Scanning directory for files...")
print("Press Ctrl+C at any time to cancel the process.")
all_files = [os.path.join(root, f) for root, _, files in os.walk(root_dir) for f in files]
total_files = len(all_files)
print(f"Found {total_files} total files. Processing with {threads} threads in chunks of {chunk_size}...")
file_count = ignored_count = error_count = too_large_count = 0
chunked_output = []
# Process files with multithreading and progress bar
with ThreadPoolExecutor(max_workers=threads) as executor:
future_to_file = {executor.submit(process_file, file_path, root_dir, ignore_spec, max_size_kb, verbose): file_path for file_path in all_files}
with tqdm(total=total_files, desc="Processing files", unit="file") as pbar:
for future in as_completed(future_to_file):
if cancel_requested:
# Cancel any pending futures
for f in future_to_file:
f.cancel()
break
lines, ignored, errored, too_large = future.result()
if ignored:
ignored_count += 1
elif errored:
error_count += 1
if lines: # Check if lines is not None before extending
chunked_output.extend(lines)
elif too_large:
too_large_count += 1
if lines: # Check if lines is not None before extending
chunked_output.extend(lines)
else:
file_count += 1
if lines: # Check if lines is not None before extending
chunked_output.extend(lines)
# Update progress bar
pbar.update(1)
# Process chunk if full
if len(chunked_output) >= chunk_size:
output.extend(chunked_output)
chunked_output = []
# Append any remaining chunk
if chunked_output:
output.extend(chunked_output)
# If cancelled, add a note
if cancel_requested:
output.append("\n# NOTE: This export was cancelled before completion.")
print("\nProcess was cancelled. Saving partial results...")
final_text = "\n".join(output)
end_time = time.time()
duration = end_time - start_time
clip_size = len(final_text.encode("utf-8")) / 1024
# Initialize output_file to None
output_file = None
if output_to_file or cancel_requested:
# Generate dated output filename
timestamp = time.strftime("%Y%m%d_%H%M%S")
output_file = f"codeclip_{timestamp}.txt"
with open(output_file, "w", encoding="utf-8") as f:
f.write(final_text)
print("")
if cancel_requested:
print(f"✓ Partial results saved! Wrote {file_count} files to '{output_file}' in '{root_dir}'.")
else:
print(f"✓ Success! Wrote {file_count} files to '{output_file}' in '{root_dir}'.")
else:
try:
pyperclip.copy(final_text)
print("")
print(f"✓ Success! Copied {file_count} files to clipboard from '{root_dir}'.")
except pyperclip.PyperclipException as e:
print(f"Clipboard error: {e}")
timestamp = time.strftime("%Y%m%d_%H%M%S")
output_file = f"codeclip_{timestamp}.txt"
with open(output_file, "w", encoding="utf-8") as f:
f.write(final_text)
print(f"Saved to {output_file} instead due to clipboard failure.")
print(f" • Processed in {duration:.2f} seconds")
print(f" • Ignored: {ignored_count} files")
if too_large_count:
print(f" • Too large: {too_large_count} files")
if error_count:
print(f" • Errors: {error_count} files (see codeclip.log)")
print(f" • Output size: ~{clip_size:.2f} KB")
print("")
if output_file:
print(f"Output saved to {output_file}")
else:
print("Paste with Ctrl+V into your desired tool.")
# Display log file if requested (logging is automatically enabled with show_log)
if show_log and error_count > 0:
display_log_file()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Copy codebase to clipboard or file.")
parser.add_argument("--create-ignore", action="store_true", help="Create a default .clipignore.")
parser.add_argument("--max-size", type=int, default=1024, help="Max file size in KB (default: 1024).")
parser.add_argument("--verbose", action="store_true", help="Show detailed processing info.")
parser.add_argument("--chunk-size", type=int, default=50, help="Number of files per chunk (default: 50).")
parser.add_argument("--threads", type=int, default=4, help="Number of threads (default: 4).")
parser.add_argument("--output-to-file", action="store_true", help="Save to a dated file instead of clipboard.")
parser.add_argument("--log", action="store_true", help="Enable logging to codeclip.log file and display it after execution if errors occur.")
args = parser.parse_args()
copy_codebase_to_clipboard(
create_ignore=args.create_ignore,
max_size_kb=args.max_size,
verbose=args.verbose,
chunk_size=args.chunk_size,
threads=args.threads,
output_to_file=args.output_to_file,
show_log=args.log
)