course-ally/app.py at main · Asi0Flammeus/course-ally · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from flask import Flask, render_template, request, jsonify, Response, send_file
from flask_cors import CORS
import json
import time
import threading
import queue
import tempfile
from pathlib import Path
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from dotenv import load_dotenv

# Import your existing modules
from course_components.downloader import YouTubeDownloader
from course_components.transcription import TranscriptionService, TranscriptionResult
from course_components.chapter_generator import ChapterGenerator
from course_components.quiz_generator import QuizGenerator
from course_components.quiz_workflow import QuizWorkflowManager
from course_components.utils import detect_youtube_url_type
from course_components.course_editor import CourseEditor

# Load environment variables
load_dotenv()

app = Flask(__name__)
CORS(app)

# Global progress queue for SSE
progress_queues = {}
# Global process tracking for cancellation
active_processes = {}

def create_progress_queue():
    """Create a unique progress queue for a session"""
    import uuid
    session_id = str(uuid.uuid4())
    progress_queues[session_id] = queue.Queue()
    return session_id

def send_progress(session_id, message, status="processing", percentage=None):
    """Send progress update to the client"""
    if session_id in progress_queues:
        data = {
            "message": message,
            "status": status,
            "timestamp": time.time()
        }
        if percentage is not None:
            data["percentage"] = percentage
        progress_queues[session_id].put(json.dumps(data))

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/api/cancel/<session_id>', methods=['POST'])
def cancel_process(session_id):
    """Cancel a running process"""
    if session_id in active_processes:
        active_processes[session_id]['cancelled'] = True
        send_progress(session_id, "🛑 Process cancelled by user", "error", 100)
        return jsonify({"status": "cancelled"})
    return jsonify({"status": "not_found"}), 404

@app.route('/api/playlist-to-md', methods=['POST'])
def playlist_to_md():
    """Convert YouTube playlist to markdown"""
    data = request.json
    playlist_url = data.get('playlist_url')
    subfolder = data.get('subfolder', None)

    session_id = create_progress_queue()
    active_processes[session_id] = {'cancelled': False}

    def process():
        try:
            if active_processes.get(session_id, {}).get('cancelled', False):
                return

            send_progress(session_id, "🔗 Starting playlist extraction...", "processing", 10)

            downloader = YouTubeDownloader()

            # Set up output directory
            base_path = Path('outputs') / 'playlist_to_md'
            if subfolder:
                output_path = base_path / subfolder
            else:
                output_path = base_path
            output_path.mkdir(parents=True, exist_ok=True)

            if active_processes.get(session_id, {}).get('cancelled', False):
                return

            send_progress(session_id, "📋 Fetching playlist information...", "processing", 30)

            # Get playlist videos
            def playlist_progress(message):
                send_progress(session_id, f"📋 {message}", "processing", 40)

            videos = downloader.get_playlist_videos(playlist_url, progress_callback=playlist_progress)

            if not videos:
                send_progress(session_id, "❌ No videos found in playlist.", "error", 100)
                return

            if active_processes.get(session_id, {}).get('cancelled', False):
                return

            send_progress(session_id, f"✅ Found {len(videos)} videos", "processing", 60)

            # Generate markdown content
            send_progress(session_id, "📝 Generating markdown file...", "processing", 80)

            md_content = ""
            for video in videos:
                video_id = video['id']
                video_title = video['title']
                video_url = f"https://www.youtube.com/watch?v={video_id}"
                md_content += f"## {video_title}\n"
                md_content += f"![video]({video_url})\n\n"

            # Save markdown file
            timestamp = time.strftime('%Y%m%d_%H%M%S')
            filename = f"playlist_{timestamp}.md"
            md_file = output_path / filename
            md_file.write_text(md_content, encoding='utf-8')

            send_progress(session_id, f"✨ Markdown file created: {filename}", "success", 100)

        except Exception as e:
            send_progress(session_id, f"❌ Error: {str(e)}", "error", 100)
        finally:
            if session_id in active_processes:
                del active_processes[session_id]

    # Start processing in background
    thread = threading.Thread(target=process)
    thread.start()

    return jsonify({"session_id": session_id})

@app.route('/api/extract-transcripts', methods=['POST'])
def extract_transcripts():
    """Extract transcripts from YouTube videos"""
    data = request.json
    youtube_url = data.get('youtube_url')
    subfolder = data.get('subfolder', None)
    format_type = data.get('format', 'txt')
    max_workers = data.get('max_workers', 4)
    include_timestamps = data.get('include_timestamps', False)

    session_id = create_progress_queue()
    active_processes[session_id] = {'cancelled': False}

    def process():
        try:
            if active_processes.get(session_id, {}).get('cancelled', False):
                return

            send_progress(session_id, "🔍 Detecting URL type...", "processing", 5)

            # Detect URL type
            url_type, identifier = detect_youtube_url_type(youtube_url)

            if url_type == 'invalid':
                send_progress(session_id, "❌ Invalid YouTube URL provided.", "error", 100)
                return

            send_progress(session_id, f"✅ URL Type: {url_type.upper()}", "processing", 10)

            downloader = YouTubeDownloader()
            transcription_service = TranscriptionService()

            # Set up output directory
            base_path = Path('outputs') / 'transcripts'
            if subfolder:
                output_path = base_path / subfolder
            else:
                output_path = base_path
            output_path.mkdir(parents=True, exist_ok=True)

            if url_type == 'video':
                # Handle single video
                video_id = identifier
                send_progress(session_id, "🎥 Processing single video...", "processing", 20)

                if active_processes.get(session_id, {}).get('cancelled', False):
                    return

                with tempfile.TemporaryDirectory() as tmpdir:
                    # Download audio
                    send_progress(session_id, "🔽 Downloading audio from YouTube...", "processing", 30)

                    def download_progress(message):
                        send_progress(session_id, message, "processing", 40)

                    audio_path = downloader.download_audio(video_id, tmpdir, progress_callback=download_progress)

                    if active_processes.get(session_id, {}).get('cancelled', False):
                        return

                    # Transcribe audio
                    if include_timestamps:
                        send_progress(session_id, "🎤 Transcribing audio with timestamps...", "processing", 60)
                    else:
                        send_progress(session_id, "🎤 Transcribing audio...", "processing", 60)

                    def transcribe_progress(message):
                        send_progress(session_id, message, "processing", 80)

                    transcript_result = transcription_service.transcribe(
                        audio_path,
                        progress_callback=transcribe_progress,
                        include_timestamps=include_timestamps
                    )

                    # Handle TranscriptionResult vs string
                    if isinstance(transcript_result, TranscriptionResult):
                        transcript_text = transcript_result.text
                        transcript_with_ts = transcript_result.format_with_timestamps() if transcript_result.segments else transcript_text
                        duration = transcript_result.duration
                    else:
                        transcript_text = transcript_result
                        transcript_with_ts = transcript_result
                        duration = None

                    # Save transcript
                    file_timestamp = time.strftime('%Y%m%d_%H%M%S')
                    filename = f"video_{video_id}_{file_timestamp}.{format_type}"
                    transcript_file = output_path / filename

                    if format_type == 'txt':
                        video_url = f"https://www.youtube.com/watch?v={video_id}"
                        duration_str = f"\nDuration: {duration:.1f}s" if duration else ""
                        timestamps_str = "\nTimestamps: Yes" if include_timestamps else ""
                        metadata_header = f"""# Video Transcript
Video ID: {video_id}
URL: {video_url}
Transcribed: {time.strftime('%Y-%m-%d %H:%M:%S')}{duration_str}{timestamps_str}

{'='*60}

"""
                        output_text = transcript_with_ts if include_timestamps else transcript_text
                        transcript_file.write_text(metadata_header + output_text, encoding='utf-8')
                    else:
                        transcript_data = {
                            'video_id': video_id,
                            'transcript': transcript_text,
                            'transcribed_at': time.strftime('%Y-%m-%d %H:%M:%S')
                        }
                        if include_timestamps and isinstance(transcript_result, TranscriptionResult) and transcript_result.segments:
                            transcript_data['segments'] = [
                                {'start': seg.start, 'end': seg.end, 'text': seg.text}
                                for seg in transcript_result.segments
                            ]
                        if duration:
                            transcript_data['duration'] = duration
                        with open(transcript_file, 'w', encoding='utf-8') as f:
                            json.dump(transcript_data, f, indent=2, ensure_ascii=False)

                    send_progress(session_id, f"✅ Transcript saved: {filename}", "success", 100)

            else:
                # Handle playlist
                send_progress(session_id, "🎬 Processing playlist...", "processing", 20)

                def playlist_progress(message):
                    send_progress(session_id, f"📋 {message}", "processing", 30)

                video_ids = downloader.get_playlist_video_ids(youtube_url, progress_callback=playlist_progress)

                if not video_ids:
                    send_progress(session_id, "❌ No videos found in playlist.", "error", 100)
                    return

                send_progress(session_id, f"✅ Found {len(video_ids)} videos", "processing", 40)
                send_progress(session_id, f"⚡ Using {max_workers} parallel workers", "processing", 42)

                # Thread-safe counters
                successful = 0
                failed = 0
                stats_lock = threading.Lock()

                def process_video(video_data):
                    """Process a single video"""
                    idx, video_id = video_data

                    # Check if cancelled
                    if active_processes.get(session_id, {}).get('cancelled', False):
                        return {'status': 'cancelled'}

                    with tempfile.TemporaryDirectory() as tmpdir:
                        try:
                            # Create individual instances for thread safety
                            video_downloader = YouTubeDownloader()
                            video_transcription = TranscriptionService()

                            # Download and transcribe
                            with stats_lock:
                                send_progress(session_id, f"🎥 [{idx}/{len(video_ids)}] Downloading video {video_id}", "processing")

                            audio_path = video_downloader.download_audio(video_id, tmpdir)

                            # Check if cancelled
                            if active_processes.get(session_id, {}).get('cancelled', False):
                                return {'status': 'cancelled'}

                            with stats_lock:
                                ts_msg = " with timestamps" if include_timestamps else ""
                                send_progress(session_id, f"🎤 [{idx}/{len(video_ids)}] Transcribing audio{ts_msg}", "processing")

                            transcript_result = video_transcription.transcribe(
                                audio_path,
                                include_timestamps=include_timestamps
                            )

                            # Handle TranscriptionResult vs string
                            if isinstance(transcript_result, TranscriptionResult):
                                transcript_text = transcript_result.text
                                transcript_with_ts = transcript_result.format_with_timestamps() if transcript_result.segments else transcript_text
                                duration = transcript_result.duration
                                segments = transcript_result.segments
                            else:
                                transcript_text = transcript_result
                                transcript_with_ts = transcript_result
                                duration = None
                                segments = None

                            # Save transcript
                            filename = f"{idx:02d}_video_{video_id}.{format_type}"
                            transcript_file = output_path / filename

                            video_url = f"https://www.youtube.com/watch?v={video_id}"

                            if format_type == 'txt':
                                duration_str = f"\nDuration: {duration:.1f}s" if duration else ""
                                timestamps_str = "\nTimestamps: Yes" if include_timestamps else ""
                                metadata_header = f"""# Video Transcript
Video ID: {video_id}
URL: {video_url}{duration_str}{timestamps_str}

{'='*60}

"""
                                output_text = transcript_with_ts if include_timestamps else transcript_text
                                transcript_file.write_text(metadata_header + output_text, encoding='utf-8')
                            else:
                                transcript_data = {
                                    'video_id': video_id,
                                    'url': video_url,
                                    'transcript': transcript_text,
                                    'transcribed_at': time.strftime('%Y-%m-%d %H:%M:%S')
                                }
                                if include_timestamps and segments:
                                    transcript_data['segments'] = [
                                        {'start': seg.start, 'end': seg.end, 'text': seg.text}
                                        for seg in segments
                                    ]
                                if duration:
                                    transcript_data['duration'] = duration
                                with open(transcript_file, 'w', encoding='utf-8') as f:
                                    json.dump(transcript_data, f, indent=2, ensure_ascii=False)

                            return {'status': 'success', 'video_id': video_id}

                        except Exception as e:
                            return {'status': 'failed', 'video_id': video_id, 'error': str(e)}

                # Process videos in parallel
                with ThreadPoolExecutor(max_workers=max_workers) as executor:
                    futures = {executor.submit(process_video, (idx, vid)): (idx, vid)
                              for idx, vid in enumerate(video_ids, 1)}

                    for future in as_completed(futures):
                        # Check if cancelled
                        if active_processes.get(session_id, {}).get('cancelled', False):
                            executor.shutdown(wait=False)
                            break

                        result = future.result()
                        idx, vid = futures[future]

                        with stats_lock:
                            if result['status'] == 'success':
                                successful += 1
                                percentage = 40 + ((successful + failed) / len(video_ids)) * 50
                                send_progress(session_id, f"✅ [{idx}/{len(video_ids)}] Completed: {result['video_id']}", "processing", percentage)
                            elif result['status'] == 'failed':
                                failed += 1
                                percentage = 40 + ((successful + failed) / len(video_ids)) * 50
                                send_progress(session_id, f"⚠️ [{idx}/{len(video_ids)}] Failed: {result.get('error', 'Unknown error')}", "warning", percentage)

                if not active_processes.get(session_id, {}).get('cancelled', False):
                    send_progress(session_id, f"✅ Completed: {successful} successful, {failed} failed", "success", 100)

        except Exception as e:
            send_progress(session_id, f"❌ Error: {str(e)}", "error", 100)
        finally:
            # Clean up
            if session_id in active_processes:
                del active_processes[session_id]

    # Start processing in background
    thread = threading.Thread(target=process)
    thread.start()

    return jsonify({"session_id": session_id})

@app.route('/api/download-video', methods=['POST'])
def download_video():
    """Download YouTube video with quality and timestamp options"""
    data = request.json
    video_url = data.get('video_url')
    subfolder = data.get('subfolder', None)
    quality = data.get('quality', 'best')
    start_time = data.get('start_time', None)
    end_time = data.get('end_time', None)

    session_id = create_progress_queue()
    active_processes[session_id] = {'cancelled': False}

    def process():
        try:
            if active_processes.get(session_id, {}).get('cancelled', False):
                return

            send_progress(session_id, "🔍 Analyzing YouTube URL...", "processing", 5)

            downloader = YouTubeDownloader()

            # Set up output directory
            base_path = Path('outputs') / 'videos'
            if subfolder:
                output_path = base_path / subfolder
            else:
                output_path = base_path / 'downloads'
            output_path.mkdir(parents=True, exist_ok=True)

            if active_processes.get(session_id, {}).get('cancelled', False):
                return

            # Determine if it's a playlist or single video
            is_playlist = 'list=' in video_url and 'watch?v=' not in video_url

            if is_playlist:
                send_progress(session_id, "🎬 Detected playlist - downloading all videos...", "processing", 10)

                # Download progress callback
                def download_progress(message):
                    if not active_processes.get(session_id, {}).get('cancelled', False):
                        send_progress(session_id, message, "processing", 50)

                # Use the playlist download method (no clip support for playlists)
                stats = downloader.download_playlist_videos(video_url, str(output_path), progress_callback=download_progress)

                if active_processes.get(session_id, {}).get('cancelled', False):
                    return

                # Success message with statistics
                summary = f"✅ Download Complete! Total: {stats['total']} | ✅ Success: {stats['successful']} | ⏭️ Skipped: {stats['skipped']}"
                if stats['failed'] > 0:
                    summary += f" | ⚠️ Failed: {stats['failed']}"
                send_progress(session_id, summary, "success", 100)

            else:
                # Single video with quality and timestamp options
                quality_labels = {
                    'best': 'Best Quality',
                    'high': '1080p',
                    'medium': '720p',
                    'low': '480p',
                    'audio_only': 'Audio Only'
                }
                quality_label = quality_labels.get(quality, quality)

                clip_info = ""
                if start_time or end_time:
                    clip_info = f" (Clip: {start_time or '0:00'} → {end_time or 'end'})"

                send_progress(session_id, f"🎥 Downloading video - {quality_label}{clip_info}...", "processing", 10)

                # Download progress callback
                def download_progress(message):
                    if not active_processes.get(session_id, {}).get('cancelled', False):
                        send_progress(session_id, f"📥 {message}", "processing", 50)

                # Use the new download_video_clip method
                result_path = downloader.download_video_clip(
                    video_url=video_url,
                    output_dir=str(output_path),
                    quality=quality,
                    start_time=start_time if start_time else None,
                    end_time=end_time if end_time else None,
                    progress_callback=download_progress
                )

                if active_processes.get(session_id, {}).get('cancelled', False):
                    return

                filename = result_path.name if isinstance(result_path, Path) else 'video.mp4'
                send_progress(session_id, f"✅ Downloaded: {filename}", "success", 100)

        except Exception as e:
            send_progress(session_id, f"❌ Error: {str(e)}", "error", 100)
        finally:
            if session_id in active_processes:
                del active_processes[session_id]

    # Start processing in background
    thread = threading.Thread(target=process)
    thread.start()

    return jsonify({"session_id": session_id})


@app.route('/api/video-info', methods=['POST'])
def get_video_info():
    """Get YouTube video information without downloading"""
    data = request.json
    video_url = data.get('video_url')

    if not video_url:
        return jsonify({"error": "No video URL provided"}), 400

    try:
        downloader = YouTubeDownloader()
        info = downloader.get_video_info(video_url)
        return jsonify(info)
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/api/create-chapters', methods=['POST'])
def create_chapters():
    """Create chapters from transcripts"""
    data = request.json
    transcript_folder = data.get('transcript_folder')
    subfolder = data.get('subfolder', None)
    language = data.get('language', 'en')  # Default to English

    session_id = create_progress_queue()
    active_processes[session_id] = {'cancelled': False}

    # Language name mapping for display
    language_names = {
        'en': 'English', 'fr': 'French', 'es': 'Spanish', 'de': 'German',
        'it': 'Italian', 'pt': 'Portuguese', 'ru': 'Russian', 'ja': 'Japanese',
        'ko': 'Korean', 'zh-Hans': 'Chinese (Simplified)', 'zh-Hant': 'Chinese (Traditional)',
        'ar': 'Arabic', 'hi': 'Hindi', 'cs': 'Czech', 'nl': 'Dutch', 'pl': 'Polish',
        'tr': 'Turkish', 'vi': 'Vietnamese', 'id': 'Indonesian', 'fi': 'Finnish',
        'sv': 'Swedish', 'nb-NO': 'Norwegian', 'et': 'Estonian', 'fa': 'Persian',
        'rn': 'Kirundi', 'si': 'Sinhala', 'sw': 'Swahili', 'sr-Latn': 'Serbian'
    }
    language_name = language_names.get(language, language)

    def process():
        try:
            if active_processes.get(session_id, {}).get('cancelled', False):
                return

            send_progress(session_id, f"📚 Starting chapter generation in {language_name}...", "processing", 10)

            # Find transcript files
            transcripts_path = Path('outputs') / 'transcripts' / transcript_folder
            if not transcripts_path.exists():
                send_progress(session_id, f"❌ Transcript folder not found: {transcript_folder}", "error", 100)
                return

            txt_files = list(transcripts_path.glob('*.txt'))
            if not txt_files:
                send_progress(session_id, "❌ No transcript files found", "error", 100)
                return

            send_progress(session_id, f"📄 Found {len(txt_files)} transcript files", "processing", 20)

            # Set up output directory
            base_path = Path('outputs') / 'chapters'
            if subfolder:
                output_path = base_path / subfolder
            else:
                output_path = base_path / transcript_folder
            output_path.mkdir(parents=True, exist_ok=True)

            # Initialize chapter generator with language
            try:
                generator = ChapterGenerator(language=language)
                send_progress(session_id, f"✅ Chapter generator initialized ({language_name})", "processing", 30)
            except Exception as e:
                send_progress(session_id, f"❌ Error initializing generator: {str(e)}", "error", 100)
                return

            # Process each transcript
            for idx, transcript_file in enumerate(txt_files, 1):
                if active_processes.get(session_id, {}).get('cancelled', False):
                    break

                percentage = 30 + (idx / len(txt_files)) * 60
                send_progress(session_id, f"📖 Processing: {transcript_file.name}", "processing", percentage)

                chapter_filename = transcript_file.stem + '_chapter.md'
                chapter_file = output_path / chapter_filename

                if not chapter_file.exists():
                    try:
                        chapter_content = generator.generate_chapter_from_file(
                            transcript_file=transcript_file,
                            output_file=chapter_file
                        )
                        send_progress(session_id, f"✅ Created: {chapter_filename}", "processing", percentage)
                    except Exception as e:
                        send_progress(session_id, f"⚠️ Error with {transcript_file.name}: {str(e)}", "warning", percentage)
                else:
                    send_progress(session_id, f"⏭️ Skipping existing: {chapter_filename}", "processing", percentage)

            if not active_processes.get(session_id, {}).get('cancelled', False):
                send_progress(session_id, "✅ All chapters generated successfully!", "success", 100)

        except Exception as e:
            send_progress(session_id, f"❌ Error: {str(e)}", "error", 100)
        finally:
            if session_id in active_processes:
                del active_processes[session_id]

    # Start processing in background
    thread = threading.Thread(target=process)
    thread.start()

    return jsonify({"session_id": session_id})

@app.route('/api/create-quiz', methods=['POST'])
def create_quiz():
    """Create quiz from chapters - supports both old and new workflow"""
    data = request.json

    # Check if this is the new wizard workflow (has 'repository' field)
    if 'repository' in data:
        # New wizard workflow
        from course_components.quiz_workflow import QuizWorkflowManager

        repository = data.get('repository')
        courses = data.get('courses', [])
        language = data.get('language', 'en')
        chapters = data.get('chapters', 'all')
        specific_chapters = data.get('specific_chapters', [])  # Get the actual chapter IDs
        difficulty = data.get('difficulty', {'easy': 3, 'intermediate': 3, 'hard': 3})
        author = data.get('author', 'Unknown Author')
        contributors_str = data.get('contributors', '')

        session_id = create_progress_queue()
        active_processes[session_id] = {'cancelled': False}

        def process():
            try:
                if active_processes.get(session_id, {}).get('cancelled', False):
                    return

                send_progress(session_id, "🚀 Initializing quiz workflow manager...", "processing", 5)

                # Initialize workflow manager
                workflow_manager = QuizWorkflowManager()

                # Set author and contributors
                workflow_manager.author = author
                contributors = []
                if contributors_str:
                    contributors = [name.strip() for name in contributors_str.split(',') if name.strip()]
                    workflow_manager.contributors = contributors

                send_progress(session_id, f"📚 Processing {len(courses)} course(s)...", "processing", 10)

                total_questions_generated = 0
                questions_per_chapter = sum(difficulty.values())

                # Process each course
                for course_idx, course in enumerate(courses):
                    course_progress_base = 10 + (course_idx * 80 / len(courses))

                    if active_processes.get(session_id, {}).get('cancelled', False):
                        send_progress(session_id, "🛑 Process cancelled", "error", 100)
                        return

                    send_progress(session_id, f"📖 Processing course: {course}", "processing", course_progress_base)

                    # Get chapters for the course
                    course_chapters = workflow_manager.list_chapters(repository, course, language)

                    if not course_chapters:
                        send_progress(session_id, f"⚠️ No chapters found for {course}", "warning", course_progress_base + 5)
                        continue

                    # Filter chapters if specific ones requested
                    if chapters == 'specific' and specific_chapters:
                        course_chapters = [ch for ch in course_chapters if ch['chapter_id'] in specific_chapters]

                    send_progress(session_id, f"📝 Found {len(course_chapters)} chapters in {course}", "processing", course_progress_base + 10)

                    # Extract chapter IDs for quiz generation
                    chapter_ids = [ch['chapter_id'] for ch in course_chapters]

                    # Calculate total questions per chapter based on difficulty
                    questions_per_chapter = sum(difficulty.values())

                    # Generate quiz using the workflow manager
                    try:
                        # Create progress callback that accepts 3 arguments
                        def quiz_progress(message, status, percentage):
                            nonlocal total_questions_generated
                            if 'Generated' in message:
                                total_questions_generated += 1
                            send_progress(session_id, message, status,
                                        course_progress_base + 20 + (course_idx * 60 / len(courses)))

                        # Generate quiz for all chapters in this course
                        for progress_update in workflow_manager.generate_quiz(
                            repo_key=repository,
                            course_name=course,
                            chapter_ids=chapter_ids,
                            language=language,
                            question_count=questions_per_chapter,
                            difficulty_proportions={
                                'easy': difficulty['easy'] / questions_per_chapter,
                                'intermediate': difficulty['intermediate'] / questions_per_chapter,
                                'hard': difficulty['hard'] / questions_per_chapter
                            },
                            author=author,
                            contributors=contributors,
                            progress_callback=quiz_progress
                        ):
                            if active_processes.get(session_id, {}).get('cancelled', False):
                                send_progress(session_id, "🛑 Process cancelled", "error", 100)
                                return
                    except Exception as e:
                        send_progress(session_id, f"⚠️ Error generating quiz for {course}: {str(e)}", "warning", course_progress_base + 80)

                send_progress(session_id, f"🎉 Quiz generation complete! Generated {total_questions_generated} questions", "success", 100)

            except Exception as e:
                send_progress(session_id, f"❌ Error: {str(e)}", "error", 100)

        # Start processing in background
        thread = threading.Thread(target=process)
        thread.start()

        return jsonify({"session_id": session_id})

    else:
        # Old workflow (backward compatibility)
        chapter_folder = data.get('chapter_folder')
        subfolder = data.get('subfolder', None)
        author = data.get('author', 'Unknown Author')
        contributors_str = data.get('contributors', '')

        session_id = create_progress_queue()
        active_processes[session_id] = {'cancelled': False}

        def process():
            try:
                if active_processes.get(session_id, {}).get('cancelled', False):
                    return

                send_progress(session_id, "🧠 Starting quiz generation...", "processing", 10)

                # Find chapter files
                chapters_path = Path('outputs') / 'chapters' / chapter_folder
                if not chapters_path.exists():
                    send_progress(session_id, f"❌ Chapter folder not found: {chapter_folder}", "error", 100)
                    return

                # Sort chapter files alphabetically to maintain order
                md_files = sorted(list(chapters_path.glob('*_chapter.md')), key=lambda x: x.name)
                if not md_files:
                    send_progress(session_id, "❌ No chapter files found", "error", 100)
                    return

                send_progress(session_id, f"📄 Found {len(md_files)} chapter files (processing in order)", "processing", 20)

                # Set up output directory
                base_path = Path('outputs') / 'quizz'
                if subfolder:
                    output_path = base_path / subfolder
                else:
                    output_path = base_path / chapter_folder
                output_path.mkdir(parents=True, exist_ok=True)

                # Initialize quiz generator
                try:
                    generator = QuizGenerator()
                    generator.author = author

                    # Parse contributors
                    if contributors_str:
                        generator.contributor_names = [name.strip() for name in contributors_str.split(',') if name.strip()]
                    else:
                        generator.contributor_names = []

                    send_progress(session_id, f"✅ Quiz generator initialized with author: {author}", "processing", 30)
                    if generator.contributor_names:
                        send_progress(session_id, f"📝 Contributors: {', '.join(generator.contributor_names)}", "processing", 32)
                except Exception as e:
                    send_progress(session_id, f"❌ Error initializing generator: {str(e)}", "error", 100)
                    return

                # Process each chapter
                for idx, chapter_file in enumerate(md_files, 1):
                    if active_processes.get(session_id, {}).get('cancelled', False):
                        break

                    percentage = 30 + (idx / len(md_files)) * 60
                    send_progress(session_id, f"🧠 Processing: {chapter_file.name}", "processing", percentage)

                    try:
                        # Generate quizzes (simplified - not interactive for web)
                        all_quizzes = generator.generate_quizzes_from_file(chapter_file)

                        # Save quizzes
                        generator.save_multiple_quizzes(all_quizzes, output_path, chapter_file.stem)

                        send_progress(session_id, f"✅ Created {len(all_quizzes)} quiz questions", "processing", percentage)
                    except Exception as e:
                        send_progress(session_id, f"⚠️ Error with {chapter_file.name}: {str(e)}", "warning", percentage)

                if not active_processes.get(session_id, {}).get('cancelled', False):
                    send_progress(session_id, "✅ All quizzes generated successfully!", "success", 100)

            except Exception as e:
                send_progress(session_id, f"❌ Error: {str(e)}", "error", 100)
            finally:
                if session_id in active_processes:
                    del active_processes[session_id]

    # Start processing in background
    thread = threading.Thread(target=process)
    thread.start()

    return jsonify({"session_id": session_id})

@app.route('/api/list-folders', methods=['GET'])
def list_folders():
    """List available folders in outputs directory"""
    folder_type = request.args.get('type', 'transcripts')

    base_path = Path('outputs') / folder_type
    if not base_path.exists():
        return jsonify({"folders": []})

    folders = [f.name for f in base_path.iterdir() if f.is_dir()]
    return jsonify({"folders": sorted(folders)})

@app.route('/api/quiz/repos', methods=['GET'])
def quiz_list_repos():
    """List available quiz repositories"""
    try:
        workflow_manager = QuizWorkflowManager()
        repositories = workflow_manager.list_repositories()

        # Create a dictionary indexed by repo key for frontend compatibility
        repo_dict = {}
        repo_data = []

        for repo in repositories:
            repo_info = {
                'key': repo.key,
                'name': repo.name,
                'path': str(repo.path),
                'configured': repo.configured,
                'exists': repo.exists,
                'valid': repo.valid,
                'available': repo.valid  # Add 'available' field for frontend
            }
            repo_data.append(repo_info)
            repo_dict[repo.key] = repo_info

        return jsonify({
            'repositories': repo_dict,  # Dictionary for easy lookup
            'repositoryList': repo_data,  # Array for iteration
            'total': len(repo_data),
            'valid': len([r for r in repo_data if r['valid']])
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/quiz/courses', methods=['GET'])
def quiz_list_courses():
    """List courses in a repository"""
    repo_key = request.args.get('repo_key')
    if not repo_key:
        return jsonify({'error': 'repo_key parameter required'}), 400

    try:
        workflow_manager = QuizWorkflowManager()
        courses = workflow_manager.list_courses(repo_key)

        return jsonify({
            'courses': courses,
            'total': len(courses),
            'repository': repo_key
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/quiz/chapters', methods=['GET'])
def quiz_list_chapters():
    """List chapters in a course"""
    repo_key = request.args.get('repo_key')
    course_name = request.args.get('course_name')
    language = request.args.get('language', 'en')

    if not repo_key or not course_name:
        return jsonify({'error': 'repo_key and course_name parameters required'}), 400

    try:
        workflow_manager = QuizWorkflowManager()
        chapters = workflow_manager.list_chapters(repo_key, course_name, language)

        return jsonify({
            'chapters': chapters,
            'total': len(chapters),
            'repository': repo_key,
            'course': course_name,
            'language': language
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/quiz/languages', methods=['GET'])
def quiz_list_languages():
    """List available languages for a course"""
    repo_key = request.args.get('repo_key')
    course_name = request.args.get('course_name')

    if not repo_key or not course_name:
        return jsonify({'error': 'repo_key and course_name parameters required'}), 400

    try:
        workflow_manager = QuizWorkflowManager()
        language_codes = workflow_manager.list_languages(repo_key, course_name)

        # Map language codes to full names
        language_names = {
            'en': 'English',
            'es': 'Spanish',
            'fr': 'French',
            'de': 'German',
            'it': 'Italian',
            'pt': 'Portuguese',
            'ru': 'Russian',
            'ja': 'Japanese',
            'ko': 'Korean',
            'zh': 'Chinese',
            'zh-Hans': 'Chinese (Simplified)',
            'zh-Hant': 'Chinese (Traditional)',
            'ar': 'Arabic',
            'hi': 'Hindi',
            'cs': 'Czech',
            'nl': 'Dutch',
            'pl': 'Polish',
            'sv': 'Swedish',
            'fi': 'Finnish',
            'et': 'Estonian',
            'id': 'Indonesian',
            'vi': 'Vietnamese',
            'fa': 'Persian',
            'sw': 'Swahili',
            'sr-Latn': 'Serbian (Latin)',
            'nb-NO': 'Norwegian',
            'rn': 'Kirundi'
        }

        # Format languages as objects with code and name
        languages = []
        for code in language_codes:
            languages.append({
                'code': code,
                'name': language_names.get(code, code.upper())
            })

        return jsonify({
            'languages': languages,
            'total': len(languages),
            'repository': repo_key,
            'course': course_name
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/quiz/generate', methods=['POST'])
def quiz_generate():
    """Generate quiz questions with progress updates"""
    data = request.json

    # Validate required parameters
    required_fields = ['repo_key', 'course_name', 'chapter_ids']
    for field in required_fields:
        if field not in data:
            return jsonify({'error': f'{field} parameter required'}), 400

    repo_key = data['repo_key']
    course_name = data['course_name']
    chapter_ids = data['chapter_ids']