commit 4596034891c45c44cba4d71b426ced06277e8ed8
Author: vangef <vangef@outlook.com>
Date:   Sat Jan 7 15:56:18 2023 +0000

    initial commit release v1.0

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b6e4761
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/BB_gradebooks/README.md b/BB_gradebooks/README.md
new file mode 100644
index 0000000..2a8d258
--- /dev/null
+++ b/BB_gradebooks/README.md
@@ -0,0 +1,6 @@
+# BBGradebookOrganiser
+Blackboard Gradebook Organiser
+
+### Blackboard gradebooks directory: *BB_gradebooks*
+
+Create a directory with an appropriate name for the gradebook / assignment in this directory, and extract the downloaded gradebook .zip file in it.
diff --git a/BB_submissions/README.md b/BB_submissions/README.md
new file mode 100644
index 0000000..d08a67b
--- /dev/null
+++ b/BB_submissions/README.md
@@ -0,0 +1,9 @@
+# BBGradebookOrganiser
+Blackboard Gradebook Organiser
+
+### Blackboard submissions directory: *BB_submissions*
+
+- Gradebooks from directory *BB_gradebooks* will be organised into this directory, in a subdirectory with the same name
+  - e.g. gradebook directory *AssignmentX* in *BB_gradebooks* will be organised into directory *AssignmentX* in *BB_submissions*
+- Also, a text file with all submission comments will be created in this directory, with the gradebook name as prefix
+  - e.g. *AssignmentX_comments.txt* will be created for gradebook *AssignmentX*
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..29c18c5
--- /dev/null
+++ b/README.md
@@ -0,0 +1,49 @@
+# BBGradebookOrganiser
+Blackboard Gradebook Organiser
+
+## Description
+
+**Blackboard Gradebook Organiser** is a tool for organising a downloaded gradebook with assignment submissions from [Blackboard Learn](https://en.wikipedia.org/wiki/Blackboard_Learn).
+The submission files are organised per student, by extracting the student number from the submission file names and creating a directory per student. Any compressed files (.zip, .rar, .7z) are extracted into the student's directory, with any remaining files submitted individually also moved into the student's directory.
+
+### Features
+- Extracts, and organises per student, the content of submitted compressed files with extensions: .zip, .rar, .7z
+  - Detects invalid/corrupt files
+  - Doesn't extract macOS system generated files (ignores directory *__MACOSX* inside the compressed file)
+  - If a compressed file is found inside any submitted compressed files, it is organised into the student's folder and the script displays its location so it can be extracted manually
+- Deletes each compressed file after successful extraction into student directory
+- Organises per student any remaining individually submitted files
+- Checks and extracts any comments from the student submission generated text files
+- Checks if any compressed files (from the contents of the submitted compressed files) have been extracted and organised per student
+  - The path of any extracted and organised compressed files will be printed (they need to be extracted manually)
+
+## Instructions
+
+### Download gradebook
+- Go to the course page on Blackboard
+- Go to *Grade Centre -> Full Grade Centre*
+- Find assignment and click on the arrow for more options, and select *Assignment File Download*
+- Select all (click *Show All* at the bottom first, to display all users) and click submit to generate the gradebook zip file
+- Wait for the generated download link to appear, and click to download
+
+### Extract gradebook
+- Extract the downloaded gradebook in a new directory inside *BB_gradebooks*
+
+### Run script
+- Before running the script for the first time, install the required packages 
+  - `python -m pip install -r requirements.txt`
+- If running on Linux/Mac, you also need to install *unrar* in order to be able to extract .rar files
+  - `sudo apt install unrar` for Linux
+  - `brew install rar` for Mac
+- Provide the name of the directory (from section *Extract gradebook* above) as an argument when running the script
+  - `python organise_gradebook.py GRADEBOOK_DIR_NAME`
+- While running, the script displays on the terminal information and stats about the gradebook submissions and files
+
+### Post-run
+- All submission files can be found - organised in directories per student number - in directory *BB_submissions* under the sub-directory named after the gradebook name provided when running the script
+  - e.g. `python organise_gradebook.py GRADEBOOK_DIR_NAME` creates the directory *GRADEBOOK_DIR_NAME* inside *BB_submissions*
+- Each student directory contains the student's extracted and individually submitted files, and the text file generated by Blackboard with the submission (which also contains any comments left by the student)
+- All comments found in the gradebook are extracted in a text file in *BB_submissions*, with the gradebook name as prefix
+  - e.g. *AssignmentX_comments.txt* will be created for gradebook *AssignmentX*
+- Compressed files are deleted after successfully extracting and organising the contents
+  - any invalid/corrupt compressed files are moved into folder *\__BAD__\_* inside the gradebook directory
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1 @@
+
diff --git a/organise_gradebook.py b/organise_gradebook.py
new file mode 100644
index 0000000..d9b956f
--- /dev/null
+++ b/organise_gradebook.py
@@ -0,0 +1,18 @@
+import os, sys
+from utils.organiser import organise_gradebook, check_submissions_dir_for_compressed
+
+def main():
+    gradebook_name = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else exit(f'\nNo gradebook name given. Provide the name as an argument.\n\nUsage: python {sys.argv[0]} [gradebook dir name]\n')
+    gradebook_dir = os.path.join('BB_gradebooks', gradebook_name)  # gradebook from Blackboard with all submissions
+    submissions_dir = os.path.join('BB_submissions', gradebook_name)  # target dir for extracted submissions
+
+    abs_path = os.getcwd()  # absolute path of main/this script
+    print(f'\nGradebook directory to organise: {os.path.join(abs_path, gradebook_dir)}')
+    
+    organise_gradebook(gradebook_dir, submissions_dir)
+    check_submissions_dir_for_compressed(submissions_dir) 
+
+
+if __name__ == '__main__':    
+    main()
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a5dba60
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+# py7zr==0.20.2
+# rarfile==4.0
+py7zr
+rarfile
diff --git a/utils/UnRAR.exe b/utils/UnRAR.exe
new file mode 100644
index 0000000..e62872f
Binary files /dev/null and b/utils/UnRAR.exe differ
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1 @@
+
diff --git a/utils/extractor.py b/utils/extractor.py
new file mode 100644
index 0000000..c9bf902
--- /dev/null
+++ b/utils/extractor.py
@@ -0,0 +1,75 @@
+import os, shutil, platform
+import zipfile, rarfile
+from py7zr import SevenZipFile, exceptions
+
+BAD_DIR_NAME = '__BAD__'
+
+def mark_file_as_BAD(file, bad_exception):
+    try:
+        filename = os.path.basename(file)
+        bad_dir = os.path.join(os.path.dirname(file), BAD_DIR_NAME)
+        os.makedirs(bad_dir, exist_ok=True)
+        bad_file_path = os.path.join(bad_dir, filename)
+        shutil.move(file, bad_file_path)
+        print(f'[Warning] Found BAD compressed file: {filename}\nMoved to: {bad_file_path}\nError message: {bad_exception}')
+    except Exception as e: 
+        print(f'[Error] {e}')
+
+
+def extract_zip(zip_file, target_dir):
+    try:
+        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
+            members = [ m for m in zip_ref.infolist() if "__MACOSX" not in m.filename ]
+            zip_ref.extractall(target_dir, members=members)  # extract all files, ignoring those with the "__MACOSX" string in the name
+            zip_ref.close()
+    except zipfile.BadZipfile as e:
+        mark_file_as_BAD(zip_file, e)
+
+
+def extract_rar(rar_file, target_dir):
+    try:    
+        with rarfile.RarFile(rar_file, 'r') as rar_ref:
+            if platform.system() == 'Windows':
+                rarfile.UNRAR_TOOL = os.path.join('utils', 'UnRAR.exe')
+            else:  # if Linux or Mac
+                rarfile.UNRAR_TOOL = 'unrar'
+            files = rar_ref.namelist()
+            files = [ f for f in files if "__MACOSX" not in f ]  # filter out files with "__MACOSX" in the name
+            rar_ref.extractall(target_dir, files)  # extract the remaining files
+            rar_ref.close()
+    except rarfile.BadRarFile as e:
+        mark_file_as_BAD(rar_file, e)
+    except rarfile.NotRarFile as e:
+        mark_file_as_BAD(rar_file, e)
+    except rarfile.RarCannotExec as e:
+        print('[Error] Missing unrar tool\nfor Windows: make sure file UnRAR.exe exists in directory \'utils\'\nfor Linux/Mac: need to install unrar (check README)')
+        exit()
+
+
+def extract_7z(seven_zip_file, target_dir):
+    try:  # extract the 7z file using py7zr
+        with open(seven_zip_file, 'rb') as f:
+            seven_zip = SevenZipFile(seven_zip_file, mode='r')
+            if not seven_zip.getnames():
+                raise exceptions.Bad7zFile
+            files = seven_zip.getnames()
+            files = [ f for f in files if "__MACOSX" not in f ]  # filter out files with "__MACOSX" in the name
+            seven_zip.extract(target_dir, targets=files)  # extract the remaining files
+            seven_zip.close()
+    except exceptions.Bad7zFile as e:
+        mark_file_as_BAD(seven_zip_file, e)
+    except Exception as e:
+        mark_file_as_BAD(seven_zip_file, e)
+
+
+def extract_file_to_dir(file_path, student_dir):
+    os.makedirs(student_dir, exist_ok=True)  # create the subdirectory for student
+
+    if file_path.lower().endswith('.zip'):
+        extract_zip(file_path, student_dir)
+    elif file_path.lower().endswith('.rar'):
+        extract_rar(file_path, student_dir) 
+    elif file_path.lower().endswith('.7z'):
+        extract_7z(file_path, student_dir) 
+    else:
+        print(f"[Error] unknown file type: {file_path}")
diff --git a/utils/organiser.py b/utils/organiser.py
new file mode 100644
index 0000000..9527603
--- /dev/null
+++ b/utils/organiser.py
@@ -0,0 +1,124 @@
+import os, shutil, re
+from utils.extractor import extract_file_to_dir
+
+BAD_DIR_NAME = '__BAD__'
+
+def validate_gradebook_dir_name(src_dir):
+    if not os.path.isdir(src_dir):  # check if it exists and is a directory
+        print(f"\n[Error] Incorrect directory: {src_dir}\n[Info] Make sure the directory exists in 'BB_gradebooks'")
+        exit()
+    if not os.listdir(src_dir):  # check if there are any files in the directory
+        print(f'\n[Info] No files found in this gradebook - nothing to organise')
+        exit()
+    if len(os.listdir(src_dir)) == 1 and BAD_DIR_NAME in os.listdir(src_dir):  # if there is 1 file/directory and it is the 'BAD' directory
+        print(f'\n[Info] Gradebook has only invalid compressed files in: {os.path.join(src_dir, BAD_DIR_NAME)}\n[Info] Nothing to organise')
+        exit()
+
+
+def get_comment_from_submission_txt(file_path):
+    no_comment_text = f'Comments:\nThere are no student comments for this assignment.'
+    no_comment_text_regex = no_comment_text
+    no_comment_regex_compile = re.compile(no_comment_text_regex)
+
+    with open(file_path) as f:
+        file_contents = f.read()
+        if not no_comment_regex_compile.findall(file_contents):
+            regular_expression = f'Comments:\n.*'
+            regex_compile = re.compile(regular_expression)
+            match = regex_compile.findall(file_contents)
+            match = str(match).replace('\\n', '').replace('[','').replace(']','').replace('"','')
+            match = str(match).split('Comments:')[-1]
+            return match
+
+
+def get_gradebook_stats(src_dir):
+    all_files = [ os.path.join(src_dir, f) for f in os.listdir(src_dir) if BAD_DIR_NAME not in f ]
+    dirs = [ f for f in all_files if os.path.isdir(f) and BAD_DIR_NAME not in f ]
+    normal_files = [ f for f in all_files if os.path.isfile(f) ]
+    
+    tracked_file_extensions = [ '.zip', '.rar', '.7z', '.txt' ]  # add extension in list to track stats for more
+    files_counter = {}
+    files_counter['all'], files_counter['dirs'], files_counter['normal'] = len(all_files), len(dirs), len(normal_files)
+
+    tracked_files_counter = 0
+    for ext in tracked_file_extensions:
+        files_counter[ext] = len([ f for f in normal_files if f.lower().endswith(ext) ])
+        tracked_files_counter += files_counter[ext]
+    
+    files_counter['tracked'] = tracked_files_counter
+    files_counter['untracked'] = files_counter['normal'] - tracked_files_counter
+
+    dirs_msg = f'. Also found {len(dirs)} dir(s), wasn\'t expecting any!' if len(dirs) else ''
+    tracked_files_list = [ f'{files_counter[ext]} {ext}' for ext in tracked_file_extensions ] 
+    tracked_msg = f"{', '.join(str(f) for f in tracked_files_list)}"
+    msg = f'\n[Stats] Gradebook contains {files_counter["all"]} file(s){dirs_msg}\n[Stats] Tracking {len(tracked_file_extensions)} file extension(s), files found: {tracked_msg}\n[Stats] Files with untracked extension: {files_counter["untracked"]}'
+    print(msg)
+    return files_counter
+
+
+def organise_file_per_student(src_dir, dest_dir, file_name, student_no):
+    student_dir = os.path.join(dest_dir, student_no)
+    os.makedirs(student_dir, exist_ok=True)  # create student directory if it doesn't exist
+    file_path = os.path.join(src_dir, file_name)
+    if os.path.isfile(file_path):
+        file_path_lowercase = file_path.lower()
+        if file_path_lowercase.endswith('.zip') or file_path_lowercase.endswith('.rar') or file_path_lowercase.endswith('.7z'):
+            extract_file_to_dir(file_path, student_dir)  # extract the file to student directory
+            if os.path.exists(file_path):  # check if compressed file exists (or it was BAD and moved), and remove if exists
+                os.remove(file_path)  # delete compressed file after successful extraction
+        else:
+            if file_path_lowercase.endswith('.txt'):
+                comment = get_comment_from_submission_txt(file_path)  # get student comment (if any) from submission txt file
+                if comment:
+                    comments_filename = f'{dest_dir}_comments.txt'
+                    with open(comments_filename, 'a') as f:
+                        f.write(f'\nStudent number: {student_no} - File: {file_path}\nComment: {comment}\n')
+            
+            new_file_path = os.path.join(student_dir, os.path.basename(file_name))
+            shutil.move(file_path, new_file_path)  # move the file to student directory
+
+
+def organise_gradebook(src_dir, dest_dir):
+    """1) extracts .zip, .rar, .7z files, organises contents into directories per student number, and deletes compressed files after successful extraction
+    2) organises all other files in gradebook into directories per student number
+    3) checks if there are any comments in submission text files and extracts them into a file
+    """
+    validate_gradebook_dir_name(src_dir)  # check if dir exists, and has files in it - exits if not
+    os.makedirs(dest_dir, exist_ok=True)  # create the destination directory if it doesn't exist
+    files_counter = get_gradebook_stats(src_dir)  # print stats about the files in gradebook and get files_counter dict to use later
+    students_numbers = []  # list to add and count unique student numbers from all files in gradebook 
+    print('\nStart organising...\n')
+    for file_name in os.listdir(src_dir):  # iterate through all files in the directory
+        if BAD_DIR_NAME not in file_name:  # ignore dir BAD_DIR_NAME (created after first run if corrupt compressed files found)
+            student_no = file_name.split('_attempt_')[0].split('_')[-1]  # get student number from file name !! pattern might need adjusting if file name format from blackboard changes !!
+            students_numbers.append(student_no)
+            organise_file_per_student(src_dir, dest_dir, file_name, student_no)
+    
+    abs_path = os.getcwd()  # absolute path of main script
+    print(f'[Info] Submissions organised into directory: {os.path.join(abs_path, dest_dir)}')
+    print(f'[Info] Unique student numbers in gradebook files: {len(set(students_numbers))}')
+    if files_counter['.txt'] == 0:
+        print(f'[Info] No submission text files found, file with comments not created')
+    else:
+        print(f'[Info] Comments in file: {dest_dir}_comments.txt')
+    
+    print(f'[Note] Compressed files (.zip, .rar, .7z) are automatically deleted from the gradebook directory after successful extraction')
+
+    
+def check_submissions_dir_for_compressed(submissions_dir):
+    """checks if any submitted compressed files contain more compressed files inside (they are not recursively extracted)
+    \nprints any compressed files location that need to be extracted manually
+    """
+    compressed_files = []
+    abs_path = os.getcwd()
+    for the_path, dirc, files in os.walk(submissions_dir):
+        for fname in files:
+            if fname.lower().endswith('.zip') or fname.lower().endswith('.rar') or fname.lower().endswith('.7z'):
+                f = os.path.join(abs_path, the_path, fname)
+                compressed_files.append(f)
+    
+    if compressed_files:
+        compressed_files_str = '\n'.join(compressed_files)
+        print(f'\n[Warning] One or more compressed files from the gradebook contain compressed file(s) inside ({len(compressed_files)} found in total)')
+        print('\nSee below the organised per student compressed files, and extract them manually:\n')
+        print(compressed_files_str)