diff --git a/.gitignore b/.gitignore
index dd2c92d..2258fb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -177,3 +177,6 @@ temp/
 .envs/.local/.django
 start-dev.sh
 opencode.json
+
+*.log
+!metrics/tests/fixtures/*.log
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index 1f28439..0000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,83 +0,0 @@
-# AGENTS.md
-
-## Project
-
-Django 5.2 + Wagtail 7.3 + Celery app that ingests SciELO access logs, validates them, and exports COUNTER-5 metrics to OpenSearch with monthly indices and daily nested metrics.
-
-## Key commands
-
-All commands run inside Docker via the `local.yml` compose file unless noted.
-
-```bash
-make build                           # build images
-make up                              # start all services (django, postgres, redis, celery worker+beat, mailhog)
-make django_shell                    # Django shell via docker compose
-make django_test                     # run full test suite (pytest)
-make django_fast                     # tests with --failfast
-make django_migrate                  # apply migrations
-make django_makemigrations           # generate new migrations
-make django_createsuperuser          # create Wagtail admin user
-```
-
-**Run a single test file/path:**
-```bash
-docker compose -f local.yml run --rm django pytest path/to/test_file.py
-```
-
-**Without Docker** (rare): use `start-dev.sh` after adjusting the ethernet interface name.
-
-## Architecture
-
-- **Wagtail admin**: `http://localhost:8009/admin` (NOT Django admin at `/django-admin/`)
-- **Django apps** (top-level dirs): `core` (Wagtail pages, users, utilities, collectors), `collection`, `log_manager`, `log_manager_config`, `metrics`, `document`, `reports`, `resources`, `source`, `tracker`, `core_settings`
-- **`core/`** contains utilities, shared models, Wagtail hooks, templates, and the `collectors/` subpackage. `config/` is the Django project package (settings, urls, celery_app, wsgi).
-- **Celery pipeline**: `task_daily_log_ingestion_pipeline` (auto-scheduled) chains Search -> Validate -> Parse -> Export using Celery chords. Individual steps can be triggered manually via Wagtail admin.
-- **Task names** use translatable strings, e.g. `_[Log Pipeline] 1. Search Logs (Manual)` — do not rename these casually, it breaks the schedule.
-
-## Settings
-
-- `DJANGO_SETTINGS_MODULE` defaults to `config.settings.local`
-- Tests use `config.settings.test` (set via `pytest.ini` `--ds=config.settings.test`)
-- Env files live in `.envs/.local/` (local) and `.envs/.production/` (production)
-- **`config/settings/test.py`** is minimal — it extends `base.py` and does NOT load local.py. If a test needs a setting that only exists in local.py, it must be added to test.py or set in the test directly.
-
-## Testing
-
-- Framework: **pytest** (not Django's `TestCase` runner), with `--reuse-db` by default
-- Config: `pytest.ini` sets `--ds=config.settings.test --reuse-db`
-- Both `unittest.TestCase` (Django-style) and pytest-style tests coexist; `pytest` is the runner
-- CI runs: `build -> makemigrations -> migrate -> pytest`
-- Shared fixtures in `core/conftest.py` (autouse `media_storage`, `user` fixture via factory-boy)
-
-## Linting & formatting
-
-- **black** (line length 120 implied by flake8 config; black defaults to 88 — pre-commit config pins it)
-- **isort** (black profile via `line_length=88`)
-- **flake8** (max-line-length=120 via setup.cfg)
-- Pre-commit runs all three on commit. Configuration in `setup.cfg` (flake8, isort, mypy) and `.pre-commit-config.yaml`.
-
-## Local dev quirks
-
-- Two SciELO libs (`scielo_log_validator`, `scielo_usage_counter`) are installed from local repos mounted at `/app/scielo_log_validator` and `/app/scielo_usage_counter` when `USE_LOCAL_SCIELO_LIBS=1`. The local Dockerfile strips these from `base.txt` during build and installs them from the mounted volumes via the entrypoint script.
-- Log files volume: `/mnt/pidata2/pi/scl/logs:/app/logs` (host-specific, may not exist on all machines)
-- Mailhog UI at `http://localhost:8029`
-- `manage.py` appends `core/` to `sys.path` so `from core.utils import ...` and `from utils import ...` both resolve.
-
-## OpenSearch
-
-- Client configured via `OPENSEARCH_URL`, `OPENSEARCH_BASIC_AUTH`, `OPENSEARCH_VERIFY_CERTS`
-- Index naming: `usage_monthly_{collection}_{year}` (e.g. `usage_monthly_books_2026`)
-- Upserts use Painless scripts for idempotent daily metric merging
-- `OPENSEARCH_INDEX_NAME` (default `usage`) and `OPENSEARCH_API_KEY` are defined in base settings but not widely used
-
-## MCP tools
-
-- When you need to search framework/library docs (Django, Wagtail, Celery, OpenSearch, etc.), use `context7` tools.
-- When you need to find code examples or patterns from open-source projects, use `gh_grep` tools.
-
-## Wagtail-specific notes
-
-- Multi-language: `pt-br` (default), `en`, `es`
-- Wagtail URL prefixes disabled (`prefix_default_language=False`)
-- After adding a language, run `make wagtail_sync` and `make wagtail_update_translation_field`
-- `wagtail-modeladmin` is used for managing pipeline entities in admin
diff --git a/Makefile b/Makefile
index 978625e..acf5129 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,9 @@
-default: build
+default: help
 
 COMPOSE_FILE_DEV = local.yml
 
 compose = ${COMPOSE_FILE_DEV}
 
-export SCIELO_USAGE_BUILD_DATE=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
-export SCIELO_USAGE_VCS_REF=$(strip $(shell git rev-parse --short HEAD))
 export SCIELO_USAGE_WEBAPP_VERSION=$(strip $(shell cat VERSION))
 
 help: ## Show this help
@@ -18,23 +16,12 @@ help: ## Show this help
 	@egrep '^(.+)\:\ .*##\ (.+)' ${MAKEFILE_LIST} | sed 's/:.*##/#/' | column -t -c 1 -s "#"
 	@echo ''
 	@echo 'Example:'
-	@echo "\t Type 'make' (default target=build) is the same of type 'make build compose=local.yml'"
 	@echo "\t Type 'make build' is the same of type 'make build compose=local.yml'"
 	@echo "\t Type 'make up' is the same of type 'make up compose=local.yml'"
 
 app_version: ## Show version of webapp
 	@echo "Version: " $(SCIELO_USAGE_WEBAPP_VERSION)
 
-latest_commit:  ## Show last commit ref
-	@echo "Latest commit: " $(SCIELO_USAGE_VCS_REF)
-
-build_date: ## Show build date
-	@echo "Build date: " $(SCIELO_USAGE_BUILD_DATE)
-
-############################################
-## atalhos docker compose desenvolvimento ##
-############################################
-
 build:  ## Build app using $(compose)
 	@docker compose -f $(compose) build
 
@@ -50,80 +37,54 @@ logs: ## See all app logs using $(compose)
 stop:  ## Stop all app using $(compose)
 	@docker compose -f $(compose) stop
 
-restart:
+restart: ## Restart app using $(compose)
 	@docker compose -f $(compose) restart
-	
+
 ps:  ## See all containers using $(compose)
 	@docker compose -f $(compose) ps
 
-rm:  ## Remove all containers using $(compose)
-	@docker compose -f $(compose) rm -f
+django_bash: ## Open a bash terminal from django container using $(compose)
+	@docker compose -f $(compose) run --rm django bash
 
 django_shell:  ## Open python terminal from django $(compose)
 	@docker compose -f $(compose) run --rm django python manage.py shell
 
-wagtail_sync: ## Wagtail sync Page fields (repeat every time you add a new language and to update the wagtailcore_page translations) $(compose)
-	@docker compose -f $(compose) run --rm django python manage.py sync_page_translation_fields
-
-wagtail_update_translation_field: ## Wagtail update translation fields, user this command first $(compose)
-	@docker compose -f $(compose) run --rm django python manage.py update_translation_fields
-
 django_createsuperuser: ## Create a super user from django $(compose)
 	@docker compose -f $(compose) run --rm django python manage.py createsuperuser
 
-django_bash: ## Open a bash terminar from django container using $(compose)
-	@docker compose -f $(compose) run --rm django bash
-
-django_test: ## Run tests from django container using $(compose)
-	@docker compose -f $(compose) run --rm django pytest
-
-django_fast: ## Run tests fast from django container using $(compose)
-	@docker compose -f $(compose) run --rm django pytest --failfast
+django_migrate: ## Run migrate from django container using $(compose)
+	@docker compose -f $(compose) run --rm django python manage.py migrate
 
 django_makemigrations: ## Run makemigrations from django container using $(compose)
 	@docker compose -f $(compose) run --rm django python manage.py makemigrations
 
-django_migrate: ## Run migrate from django container using $(compose)
-	@docker compose -f $(compose) run --rm django python manage.py migrate
-
 django_makemessages: ## Run ./manage.py makemessages $(compose)
 	@docker compose -f $(compose) run --rm django python manage.py makemessages --all
 
 django_compilemessages: ## Run ./manage.py compilemessages $(compose)
 	@docker compose -f $(compose) run --rm django python manage.py compilemessages
 
-django_dump_auth: ## Run manage.py dumpdata auth --indent=2 $(compose)
-	@docker compose -f $(compose) run --rm django python manage.py dumpdata auth --indent=2  --output=fixtures/auth.json
-
-django_load_auth: ## Run manage.py dumpdata auth --indent=2 $(compose)
-	@docker compose -f $(compose) run --rm django python manage.py loaddata --database=default fixtures/auth.json
-
-dump_data: ## Dump database into .sql $(compose)
-	@docker compose -f $(compose) exec -T postgres sh -c 'pg_dumpall -c -U "$$POSTGRES_USER"' > dump_`date +%d-%m-%Y"_"%H_%M_%S`.sql
-
-restore_data: ## Restore database into from latest.sql file $(compose)
-	@docker compose -f $(compose) exec -T postgres sh -c 'psql -U "$$POSTGRES_USER"' < backup/latest.sql
+wagtail_update_translation_field: ## Wagtail update translation fields, use this command first $(compose)
+	@docker compose -f $(compose) run --rm django python manage.py update_translation_fields
 
-############################################
-## Atalhos Úteis                          ##
-############################################
+wagtail_sync: ## Wagtail sync Page fields (repeat every time you add a new language and to update the wagtailcore_page translations) $(compose)
+	@docker compose -f $(compose) run --rm django python manage.py sync_page_translation_fields
 
-clean_container:  ## Remove all containers
-	@docker compose -f $(compose) rm -sf
+test: ## Alias for django_test using $(compose)
+	@docker compose -f $(compose) run --rm django pytest
 
-clean_dangling_images:  ## Remove all dangling images
-	@docker rmi -f $$(docker images --filter 'dangling=true' -q --no-trunc)
+django_test: ## Run tests from django container using $(compose)
+	@docker compose -f $(compose) run --rm django pytest
 
-clean_dangling_volumes:  ## Remove all dangling volumes
-	@docker volume rm $$(docker volume ls -f dangling=true -q)
+django_fast: ## Run tests fast from django container using $(compose)
+	@docker compose -f $(compose) run --rm django pytest --failfast
 
-clean_project_images:  ## Remove all images with "scielo_usage" on name
-	@docker rmi -f $$(docker images --filter=reference='*scielo_usage*' -q)
+lint: ## Run flake8 using $(compose)
+	@docker compose -f $(compose) run --rm django flake8
 
-volume_down:  ## Remove all volume
-	@docker compose -f $(compose) down -v
+format_check: ## Run black and isort checks using $(compose)
+	@docker compose -f $(compose) run --rm django black --check .
+	@docker compose -f $(compose) run --rm django isort --check-only .
 
-clean_migrations: ## Remove generated migration bytecode only
-	@echo "Cleaning migration bytecode..."
-	@find . -path "*/migrations/*.pyc" -delete
-	@echo "Migration bytecode cleaned successfully."
+precommit: ## Run pre-commit hooks using $(compose)
+	@docker compose -f $(compose) run --rm django pre-commit run --all-files
diff --git a/README.md b/README.md
index 2433fa8..f57d8b6 100644
--- a/README.md
+++ b/README.md
@@ -1,162 +1,196 @@
-# SciELO Usage Metrics Pipeline
+# SciELO Usage
 
-A modernized platform for processing and indexing SciELO usage logs into OpenSearch, adhering to COUNTER R5.1 standards.
+[![CI](https://github.com/scieloorg/usage/actions/workflows/ci.yml/badge.svg)](https://github.com/scieloorg/usage/actions/workflows/ci.yml)
+![Python](https://img.shields.io/badge/python-3.11-blue)
+![Django](https://img.shields.io/badge/django-5.2-green)
+![Wagtail](https://img.shields.io/badge/wagtail-7.3-teal)
 
-## Quick Start (Dev Installation)
+Application for processing SciELO access logs, extracting COUNTER R5.1 metrics, and exporting monthly/yearly usage documents to OpenSearch.
 
-To build and run the application locally:
+## Quick Start
 
-1. `make build compose=local.yml`
-2. `make django_migrate`
-3. `make django_createsuperuser`
-4. `make up`
+Local development runs with Docker Compose using `local.yml`.
 
-The application will be accessible at [http://localhost:8009/admin](http://localhost:8009/admin).
+```bash
+make build
+make django_migrate
+make django_createsuperuser
+make up
+```
+
+Admin: http://localhost:8009/admin
+
+Main local services:
 
----
+| Service | Port |
+|---|---:|
+| Django/Wagtail | 8009 |
+| PostgreSQL | 5439 |
+| Redis | 6399 |
+| Mailhog | 8029 |
 
-## Key Commands
+## Full Pipeline Setup
 
-All commands run inside Docker via the `local.yml` compose file unless noted.
+After the app is running, open a Django shell:
 
 ```bash
-make build                           # build images
-make up                              # start all services (django, postgres, redis, celery worker+beat, mailhog)
-make django_shell                    # Django shell via docker compose
-make django_test                     # run full test suite (pytest)
-make django_fast                     # tests with --failfast
-make django_migrate                  # apply migrations
-make django_makemigrations           # generate new migrations
-make django_createsuperuser          # create Wagtail admin user
-make logs                            # follow all service logs
-make ps                              # list compose services
-make django_bash                     # open a bash shell in the django container
-make django_compilemessages          # compile translation files
+make django_shell
 ```
 
-**Run a single test file/path:**
+Seed the base data and resources:
+
+```python
+from collection.tasks import task_load_collections
+from log_manager_config.tasks import task_load_log_manager_collection_settings
+from resources.tasks import task_load_geoip, task_load_robots
+
+log_config = [
+    {
+        "acronym": "scl",
+        "directory_name": "SciELO Brasil",
+        "path": "/app/logs/scielo.br",
+        "quantity": 1,
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "opac",
+    }
+]
+
+task_load_collections.delay()
+task_load_log_manager_collection_settings.delay(data=log_config)
+task_load_robots.delay()
+task_load_geoip.delay()
+```
+
+Load sources and documents before processing logs. For a first run, restrict document synchronization to a smaller date range:
+
+```python
+from document.tasks import (
+    task_load_dataset_metadata_into_documents,
+    task_load_documents_from_article_meta,
+    task_load_documents_from_opac,
+    task_load_preprints_into_documents,
+    task_sync_documents_from_scielo_books,
+)
+from source.tasks import (
+    task_load_sources_from_article_meta,
+    task_load_sources_from_scielo_books,
+)
+
+task_load_sources_from_article_meta.delay(collections=["scl"])
+task_load_sources_from_scielo_books.delay(limit=1000)
+
+date_range = {"from_date": "2025-01-01", "until_date": "2025-12-31"}
+task_load_documents_from_article_meta.delay(**date_range)
+task_load_documents_from_opac.delay(collection="scl", **date_range)
+task_load_preprints_into_documents.delay(**date_range)
+task_load_dataset_metadata_into_documents.delay(**date_range)
+task_sync_documents_from_scielo_books.delay()
+```
+
+Before starting the log pipeline, confirm in the admin that each collection has an active Log Manager configuration pointing to a readable log directory mounted in the container.
+
+For the example above, place a log file under the configured directory:
+
 ```bash
-docker compose -f local.yml run --rm django pytest path/to/test_file.py
+mkdir -p <mounted-logs-dir>/scielo.br
+cp metrics/tests/fixtures/usage.log <mounted-logs-dir>/scielo.br/usage-2021-05-21.log
 ```
 
-## Architecture & Data Pipeline
+Run the full Search -> Validate -> Parse -> Export chain for a date range:
 
-### Apps
+```python
+from log_manager.tasks import task_search_log_files
 
-| App | Purpose |
-|---|---|
-| `log_manager` | Log file discovery, validation, and status tracking |
-| `log_manager_config` | Collection-specific configuration (paths, emails, expected logs/day) |
-| `metrics` | Daily metric jobs, OpenSearch export, COUNTER R5.1 aggregation |
-| `document` | Unified metadata model for articles, books, chapters, datasets, and preprints |
-| `source` | Journal, book, preprint server, and data repository metadata |
-| `reports` | Weekly, monthly, and yearly log processing reports |
-| `resources` | Robot user-agent patterns and GeoIP MMDB management |
-| `tracker` | Discarded line tracking and error logging |
-| `core` | Wagtail pages, users, shared utilities, and external API collectors |
-| `collection` | SciELO collection management |
+task_search_log_files.delay(
+    collections=["scl"],
+    from_date="2021-05-21",
+    until_date="2021-05-21",
+    trigger_validation=True,
+)
+```
 
-### Core Collectors (`core/collectors/`)
+Monitor execution with:
 
-| Collector | Source |
-|---|---|
-| `articlemeta.py` | ArticleMeta REST/Thrift API |
-| `opac.py` | SciELO OPAC endpoint |
-| `preprints.py` | SciELO Preprints OAI-PMH |
-| `dataverse.py` | SciELO Data (Dataverse) |
-| `scielo_books.py` | SciELO Books CouchDB changes feed |
+```bash
+make logs
+```
 
-### Log Ingestion Pipeline
+## Commands
 
-The ingestion is fully automated via the **`[Log Pipeline] Daily Routine (Auto)`** task. It follows a strictly ordered sequence using Celery Chords:
+```bash
+make help                    # list available targets
+make app_version             # show VERSION
+make build                   # build local images
+make build_no_cache          # build local images without cache
+make up                      # start local services
+make logs                    # follow service logs
+make stop                    # stop local services
+make restart                 # restart local services
+make ps                      # list running services
+make django_bash             # open bash in the django container
+make django_shell            # open Django shell
+make django_createsuperuser  # create an admin user
+make django_migrate          # apply migrations
+make django_makemigrations   # create migrations
+make django_makemessages     # update translation messages
+make django_compilemessages  # compile translation messages
+make wagtail_update_translation_field
+make wagtail_sync
+make test                    # run pytest
+make django_test             # run pytest
+make django_fast             # run pytest --failfast
+make lint                    # run flake8
+make format_check            # run black/isort checks
+make precommit               # run pre-commit hooks
+```
+
+Use `compose=production.yml` or another Compose file when needed:
+
+```bash
+make ps compose=production.yml
+```
 
-- **Search**: Scans configured directories for new `.log` or `.gz` files.
-- **Validate**: Performs statistical sampling to ensure log integrity and detect the usage date.
-- **Parse**: Extracts metrics using `scielo_usage_counter`, performs URL translation, and aggregates data.
-- **Export**: Pushes results to OpenSearch using idempotent upsert scripts.
+Run one test path:
 
-### Metadata Synchronization
+```bash
+docker compose -f local.yml run --rm django pytest metrics/tests/test_opensearch.py
+```
 
-Metadata is kept in sync with SciELO sources (ArticleMeta, OPAC, Books, etc.) via the **`[Metadata] Daily Sync Routine (Auto)`** task, which runs parallel workers to ensure documents and sources are always up to date.
+## Pipeline
 
-## Supported Log Formats
+The log pipeline is coordinated by Celery tasks:
 
-| Format | Description |
-|---|---|
-| NCSA Extended | Standard Apache combined log format with optional domain prefix and IP list fields. |
-| BunnyCDN | Pipe-delimited format with Unix timestamps (7 or 10 digits), country codes, and request IDs. |
+1. Search configured directories for new `.log` and `.gz` files.
+2. Validate log samples and detect usage date.
+3. Parse requests with `scielo_usage_counter`.
+4. Aggregate COUNTER R5.1 metrics.
+5. Export idempotent monthly/yearly documents to OpenSearch.
 
-## Environment Variables
+Metadata synchronization keeps sources and documents updated from ArticleMeta, OPAC, SciELO Books, SciELO Preprints, and SciELO Data.
 
-Runtime configuration is loaded from `.envs/.local/` or `.envs/.production/` through the Compose files.
+## Periodic Tasks
 
-### Core Services
+Configure the default schedule manually in Wagtail/Admin through `django-celery-beat`
+`PeriodicTask` records. Exact cron times may vary by installation, but the default
+operational setup should include:
 
-| Variable | Default | Description |
+| Task | Suggested schedule | Notes |
 |---|---|---|
-| `OPENSEARCH_URL` | `http://localhost:9200/` | OpenSearch cluster URL |
-| `OPENSEARCH_INDEX_NAME` | `usage` | OpenSearch index prefix |
-| `OPENSEARCH_BASIC_AUTH` | `admin:admin` | OpenSearch basic auth credentials |
-| `OPENSEARCH_VERIFY_CERTS` | `False` | Verify SSL certificates for OpenSearch connections |
-| `COUNTER_ROBOTS_URL` | `https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.json` | COUNTER robot user-agent list URL used by the resources loader |
-| `MMDB_URL_TEMPLATE` | `https://download.db-ip.com/free/dbip-country-lite-{year}-{month:02d}.mmdb.gz` | DB-IP GeoIP MMDB gzip URL template; `{year}` and `{month}` are filled from the current and previous month |
-| `USE_LOCAL_SCIELO_LIBS` | `0` | Mount local `scielo_log_validator` and `scielo_usage_counter` repos for development |
-| `DJANGO_SETTINGS_MODULE` | `config.settings.local` | Django settings module |
-| `REDIS_URL` | — | Redis connection URL for Celery |
-
-### Collector Endpoints
-
-| Variable | Default | Description |
+| `[Metadata] Daily Sync Routine (Auto)` | Daily, early morning | Refreshes sources and documents before log processing. Use the `load` queue. |
+| `[Log Pipeline] Daily Routine (Auto)` | Daily, after metadata sync | Runs Search -> Validate -> Parse -> Export for new logs. Use the `load` queue. |
+| `[Metrics] Resume Log Exports` | Every 15-30 minutes | Retries errored or stale daily metric export jobs. |
+| `[Metrics] Resume Stale Parsing Logs` | Every 30-60 minutes | Marks stale `PAR` logs for retry. |
+| `[Metrics] Cleanup Daily Payloads` | Daily or weekly | Removes old exported daily payload files. |
+| `[Reports] Populate All Reports` | Daily, after log processing | Refreshes weekly, monthly, and yearly log report tables. |
+
+Optional operational tasks:
+
+| Task | Suggested schedule | Notes |
 |---|---|---|
-| `ARTICLEMETA_COLLECT_URL` | `http://articlemeta.scielo.org/api/v1/article/counter_dict` | ArticleMeta counter metadata endpoint |
-| `ARTICLEMETA_MAX_RETRIES` | `5` | ArticleMeta retry attempts |
-| `ARTICLEMETA_SLEEP_TIME` | `30` | Delay between ArticleMeta retries, in seconds |
-| `OPAC_ENDPOINT` | `https://www.scielo.br/api/v1/counter_dict` | OPAC counter metadata endpoint |
-| `OPAC_MAX_RETRIES` | `5` | OPAC retry attempts |
-| `OPAC_SLEEP_TIME` | `30` | Delay between OPAC retries, in seconds |
-| `OAI_PMH_PREPRINT_ENDPOINT` | `https://preprints.scielo.org/index.php/scielo/oai` | SciELO Preprints OAI-PMH endpoint |
-| `OAI_METADATA_PREFIX` | `oai_dc` | OAI-PMH metadata prefix |
-| `OAI_PMH_MAX_RETRIES` | `5` | OAI-PMH retry attempts |
-| `DATAVERSE_ENDPOINT` | `https://data.scielo.org/api` | SciELO Data Dataverse API endpoint |
-| `DATAVERSE_ROOT_COLLECTION` | `scielodata` | Dataverse root collection alias |
-| `DATAVERSE_SLEEP_TIME` | `30` | Dataverse request timeout/retry delay, in seconds |
-| `SCIELO_BOOKS_BASE_URL` | `http://localhost:5984` | SciELO Books CouchDB base URL |
-| `SCIELO_BOOKS_DB_NAME` | `scielobooks_1a` | SciELO Books CouchDB database name |
-| `SCIELO_BOOKS_TIMEOUT` | `60` | SciELO Books request timeout, in seconds |
-| `SCIELO_BOOKS_LIMIT` | `1000` | SciELO Books changes-feed page size |
-
-## OpenSearch Storage Strategy
-
-The OpenSearch export keeps monthly usage documents with nested daily metrics, while index names depend on collection size:
-
-- **Large and xlarge collections**: annual indices, such as `usage_monthly_scl_2024` and `usage_yearly_scl_2024`.
-- **Small collections**: stable collection indices, such as `usage_monthly_books` and `usage_yearly_books`.
-- **One Document per Month**: Each document/PID has one monthly document per metric scope.
-- **Daily Nested Metrics**: Daily granularity is preserved inside each monthly document using a `daily_metrics` object.
-- **Atomic Upserts**: Data is merged using OpenSearch **Painless Scripts**, allowing multiple logs for the same day/month to be processed without data duplication or loss.
-
-## Management & Monitoring
-
-All pipelines can be monitored through the **Wagtail Admin**:
-
-- **Log Manager**: Monitor the status of individual log files (`QUEUED`, `PARSING`, `PROCESSED`).
-- **Daily Metric Jobs**: Track the history of daily processing and OpenSearch export attempts.
-- **Log Config**: Manage collection-specific settings, log paths, and notification emails.
-
-Internally, log file statuses are stored as short codes such as `QUE`, `PAR`, and `PRO`, with labels displayed in the admin.
-
-### Useful Commands
-
-- `make django_shell`: Access the Django interactive shell.
-- `make django_bash`: Open a bash shell in the Django container.
-- `make logs`: Follow Docker Compose logs.
-- `make ps`: Show running services.
-- `docker compose -f local.yml run --rm django pytest path/to/test_file.py`: Run a single test file or path.
-- `docker logs -f scielo_usage_local_celeryworker`: Monitor real-time task execution.
-
-## Dependencies
-
-- [scielo_log_validator](https://github.com/scieloorg/scielo_log_validator) — log file validation
-- [scielo_usage_counter](https://github.com/scieloorg/scielo_usage_counter) — COUNTER R5.1 metrics extraction
-- [device_detector](https://github.com/thinkwelltwd/device_detector) — client name/version detection
-- [opensearch-py](https://github.com/opensearch-project/opensearch-py) — OpenSearch client
+| `[Reports] Generate Log Report Summary (Manual)` | Manual or scheduled as needed | Sends summary emails using configured collection contacts. |
+| `[Resources] Load Robots Data` | Weekly | Refreshes robots list used during parsing. |
+| `[Resources] Load Geolocation Data` | Monthly | Refreshes GeoIP data used during parsing. |
+
+## Version
+
+Project release version is stored in `VERSION`.
diff --git a/VERSION b/VERSION
index 50ffc5a..7ec1d6d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.0.3
+2.1.0
diff --git a/collection/admin.py b/collection/admin.py
index 8c38f3f..846f6b4 100644
--- a/collection/admin.py
+++ b/collection/admin.py
@@ -1,3 +1 @@
-from django.contrib import admin
-
 # Register your models here.
diff --git a/collection/exceptions.py b/collection/exceptions.py
index 62ce062..e25d988 100644
--- a/collection/exceptions.py
+++ b/collection/exceptions.py
@@ -1,2 +1,2 @@
 class MainCollectionNotFoundError(Exception):
-    ...
\ No newline at end of file
+    ...
diff --git a/collection/models.py b/collection/models.py
index 87da123..b26dd16 100644
--- a/collection/models.py
+++ b/collection/models.py
@@ -11,7 +11,7 @@
 from core.models import CommonControlField, Language, TextWithLang
 from core.utils.request_utils import fetch_data
 
-from . import choices
+from collection import choices
 
 
 class CollectionName(TextWithLang):
@@ -97,7 +97,7 @@ def autocomplete_label(self):
     ]
 
     class Meta:
-        ordering = ['main_name']
+        ordering = ["main_name"]
         verbose_name = _("Collection")
         verbose_name_plural = _("Collections")
         indexes = [
@@ -237,7 +237,7 @@ def name(self):
     @classmethod
     def acron2_list(self):
         return [col.acron2 for col in Collection.objects.iterator()]
-    
+
     @classmethod
     def acron3_list(self):
         return [col.acron3 for col in Collection.objects.iterator()]
diff --git a/collection/tasks.py b/collection/tasks.py
index 221e8bc..303ecf8 100644
--- a/collection/tasks.py
+++ b/collection/tasks.py
@@ -1,13 +1,13 @@
 from django.contrib.auth import get_user_model
 
-from core.utils.request_utils import _get_user
 from collection.models import Collection
 from config import celery_app
+from core.utils.request_utils import _get_user
 
 User = get_user_model()
 
 
-@celery_app.task(bind=True, name='[Collection] Load Collection Data')
+@celery_app.task(bind=True, name="[Collection] Load Collection Data")
 def task_load_collections(self, user_id=None, username=None):
     user = _get_user(self.request, username=username, user_id=user_id)
     Collection.load(user)
diff --git a/collection/tests.py b/collection/tests.py
deleted file mode 100644
index 7ce503c..0000000
--- a/collection/tests.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/collection/views.py b/collection/views.py
index 91ea44a..60f00ef 100644
--- a/collection/views.py
+++ b/collection/views.py
@@ -1,3 +1 @@
-from django.shortcuts import render
-
 # Create your views here.
diff --git a/collection/wagtail_hooks.py b/collection/wagtail_hooks.py
index 018dab8..e556930 100644
--- a/collection/wagtail_hooks.py
+++ b/collection/wagtail_hooks.py
@@ -5,7 +5,8 @@
 from config.menu import get_menu_order
 from document.wagtail_hooks import DocumentSnippetViewSet
 from source.wagtail_hooks import SourceSnippetViewSet
-from .models import Collection
+
+from collection.models import Collection
 
 
 class CollectionSnippetViewSet(SnippetViewSet):
diff --git a/config/collections.py b/config/collections.py
index 9aa3efe..7249f5d 100644
--- a/config/collections.py
+++ b/config/collections.py
@@ -33,31 +33,247 @@
     "xlarge": 0.1,
 }
 
+
+def get_collection_size(collection_acronym):
+    return COLLECTION_ACRON3_SIZE_MAP.get(collection_acronym, "small")
+
+
+def get_collection_parse_queue(collection_acronym):
+    return f"parse_{get_collection_size(collection_acronym)}"
+
 LOG_MANAGER_SEED_DATA = [
-    {"acronym": "arg", "directory_name": "Site clássico", "path": "/app/logs/scielo.ar", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "bol", "directory_name": "Site clássico", "path": "/app/logs/scielo.bo", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "chl", "directory_name": "Site clássico", "path": "/app/logs/scielo.cl", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "col", "directory_name": "Site clássico", "path": "/app/logs/scielo.co", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "cri", "directory_name": "Site clássico", "path": "/app/logs/scielo.cr", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "cub", "directory_name": "Site clássico", "path": "/app/logs/scielo.cu", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "data", "directory_name": "Site clássico", "path": "/app/logs/dataverse", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "dataverse"},
-    {"acronym": "dom", "directory_name": "Site novo", "path": "/app/logs/scielo.dom", "quantity": 1, "start_date": "2026-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "opac"},
-    {"acronym": "ecu", "directory_name": "Site clássico", "path": "/app/logs/scielo.ec", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "esp", "directory_name": "Site clássico", "path": "/app/logs/scielo.es", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "mex", "directory_name": "Site clássico", "path": "/app/logs/scielo.mx", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "per", "directory_name": "Site clássico", "path": "/app/logs/scielo.pe", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "preprints", "directory_name": "Site clássico", "path": "/app/logs/submission-node01", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "preprints"},
-    {"acronym": "prt", "directory_name": "Site clássico", "path": "/app/logs/scielo.pt", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "pry", "directory_name": "Site clássico", "path": "/app/logs/scielo.py", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "psi", "directory_name": "Site clássico", "path": "/app/logs/scielo.pepsic", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "rve", "directory_name": "Site clássico", "path": "/app/logs/scielo.revenf", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "rvt", "directory_name": "Site clássico", "path": "/app/logs/scielo.revtur", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "scl", "directory_name": "Site novo", "path": "/app/logs/scielo.br", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "opac"},
-    {"acronym": "spa", "directory_name": "Site novo - versão prévia", "path": "/app/logs/scielo.sp", "quantity": 2, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "opac_alpha"},
-    {"acronym": "sss", "directory_name": "Site clássico", "path": "/app/logs/scielo.ss", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "sza", "directory_name": "Site clássico", "path": "/app/logs/scielo.za", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "ury", "directory_name": "Site clássico", "path": "/app/logs/scielo.uy", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "ven", "directory_name": "Site clássico", "path": "/app/logs/scielo.ve", "quantity": 1, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "wid", "directory_name": "Site clássico", "path": "/app/logs/scielo.wi", "quantity": 2, "start_date": "2020-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "classic"},
-    {"acronym": "books", "directory_name": "SciELO Books", "path": "/app/logs/books", "quantity": 1, "start_date": "2012-01-01", "e-mail": "tecnologia@scielo.org", "translator_class": "books"},
+    {
+        "acronym": "arg",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.ar",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "bol",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.bo",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "chl",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.cl",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "col",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.co",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "cri",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.cr",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "cub",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.cu",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "data",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/dataverse",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "dataverse",
+    },
+    {
+        "acronym": "dom",
+        "directory_name": "Site novo",
+        "path": "/app/logs/scielo.dom",
+        "quantity": 1,
+        "start_date": "2026-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "opac",
+    },
+    {
+        "acronym": "ecu",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.ec",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "esp",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.es",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "mex",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.mx",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "per",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.pe",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "preprints",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/submission-node01",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "preprints",
+    },
+    {
+        "acronym": "prt",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.pt",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "pry",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.py",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "psi",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.pepsic",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "rve",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.revenf",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "rvt",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.revtur",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "scl",
+        "directory_name": "Site novo",
+        "path": "/app/logs/scielo.br",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "opac",
+    },
+    {
+        "acronym": "spa",
+        "directory_name": "Site novo - versão prévia",
+        "path": "/app/logs/scielo.sp",
+        "quantity": 2,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "opac_alpha",
+    },
+    {
+        "acronym": "sss",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.ss",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "sza",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.za",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "ury",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.uy",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "ven",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.ve",
+        "quantity": 1,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "wid",
+        "directory_name": "Site clássico",
+        "path": "/app/logs/scielo.wi",
+        "quantity": 2,
+        "start_date": "2020-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "classic",
+    },
+    {
+        "acronym": "books",
+        "directory_name": "SciELO Books",
+        "path": "/app/logs/books",
+        "quantity": 1,
+        "start_date": "2012-01-01",
+        "e-mail": "tecnologia@scielo.org",
+        "translator_class": "books",
+    },
 ]
diff --git a/config/menu.py b/config/menu.py
index 844ce0c..f1429e8 100644
--- a/config/menu.py
+++ b/config/menu.py
@@ -7,8 +7,9 @@
     "tasks": 600,
 }
 
+
 def get_menu_order(app_name):
     try:
         return WAGTAIL_MENU_APPS_ORDER[app_name]
-    except:
+    except KeyError:
         return 950
diff --git a/config/settings/base.py b/config/settings/base.py
index 62aa17a..0b67885 100644
--- a/config/settings/base.py
+++ b/config/settings/base.py
@@ -319,6 +319,20 @@
 CELERY_RESULT_BACKEND = CELERY_BROKER_URL
 # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-accept_content
 CELERY_ACCEPT_CONTENT = ["json"]
+# Import nested task packages that are not exposed through package __init__.py files.
+CELERY_IMPORTS = (
+    "document.tasks.articlemeta",
+    "document.tasks.dataverse",
+    "document.tasks.opac",
+    "document.tasks.pipeline",
+    "document.tasks.preprints",
+    "document.tasks.scielo_books",
+    "metrics.tasks.cleanup",
+    "metrics.tasks.daily_metric_exports",
+    "metrics.tasks.index",
+    "metrics.tasks.log_parsing",
+    "metrics.tasks.resume",
+)
 # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-task_serializer
 CELERY_TASK_SERIALIZER = "json"
 # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-result_serializer
diff --git a/config/urls.py b/config/urls.py
index 73ecd86..91bb5a4 100644
--- a/config/urls.py
+++ b/config/urls.py
@@ -9,7 +9,6 @@
 from wagtail.documents import urls as wagtaildocs_urls
 from wagtailautocomplete.urls.admin import urlpatterns as autocomplete_admin_urls
 
-
 urlpatterns = [
     path("admin/autocomplete/", include(autocomplete_admin_urls)),
     path(settings.DJANGO_ADMIN_URL, admin.site.urls),
diff --git a/core/__init__.py b/core/__init__.py
index e1d8615..e69de29 100644
--- a/core/__init__.py
+++ b/core/__init__.py
@@ -1,7 +0,0 @@
-__version__ = "0.1.0"
-__version_info__ = tuple(
-    [
-        int(num) if num.isdigit() else num
-        for num in __version__.replace("-", ".", 1).split(".")
-    ]
-)
diff --git a/core/collectors/__init__.py b/core/collectors/__init__.py
index 8b13789..e69de29 100644
--- a/core/collectors/__init__.py
+++ b/core/collectors/__init__.py
@@ -1 +0,0 @@
-
diff --git a/core/collectors/articlemeta.py b/core/collectors/articlemeta.py
index 7f6ace0..b544827 100644
--- a/core/collectors/articlemeta.py
+++ b/core/collectors/articlemeta.py
@@ -1,9 +1,9 @@
 import logging
+from time import sleep
 
 import requests
-from django.conf import settings
 from articlemeta.client import RestfulClient, ThriftClient
-from time import sleep
+from django.conf import settings
 
 
 def fetch_article_counter_dict(
diff --git a/core/collectors/opac.py b/core/collectors/opac.py
index 94122b7..5771453 100644
--- a/core/collectors/opac.py
+++ b/core/collectors/opac.py
@@ -1,8 +1,8 @@
 import logging
+from time import sleep
 
 import requests
 from django.conf import settings
-from time import sleep
 
 
 def fetch_counter_dict(from_date, until_date, page=1):
diff --git a/core/collectors/scielo_books.py b/core/collectors/scielo_books.py
index b1f2dd8..87595ba 100644
--- a/core/collectors/scielo_books.py
+++ b/core/collectors/scielo_books.py
@@ -1,10 +1,8 @@
 import logging
+from urllib.parse import urlencode
 
 import requests
 from django.conf import settings
-from urllib.parse import urlencode
-
-
 
 
 def build_url(base_url, params=None):
@@ -33,7 +31,9 @@ def fetch_document(doc_id, base_url=None, db_name=None, headers=None):
         raise ValueError("SCIELO_BOOKS_BASE_URL is not configured")
 
     url = f"{resolved_base_url}/{db_name}/{doc_id}"
-    response = requests.get(url, headers=headers, timeout=settings.SCIELO_BOOKS_TIMEOUT, verify=False)
+    response = requests.get(
+        url, headers=headers, timeout=settings.SCIELO_BOOKS_TIMEOUT, verify=False
+    )
     response.raise_for_status()
     payload = response.json()
     return sanitize_raw_data(payload), url
@@ -62,7 +62,9 @@ def fetch_changes_page(
         params["include_docs"] = "true"
 
     url = build_url(f"{resolved_base_url}/{db_name}/_changes", params)
-    response = requests.get(url, headers=headers, timeout=settings.SCIELO_BOOKS_TIMEOUT, verify=False)
+    response = requests.get(
+        url, headers=headers, timeout=settings.SCIELO_BOOKS_TIMEOUT, verify=False
+    )
     response.raise_for_status()
     payload = response.json()
     return payload if isinstance(payload, dict) else {}
diff --git a/core/home/models.py b/core/home/models.py
index 1734b90..d2ce8dc 100644
--- a/core/home/models.py
+++ b/core/home/models.py
@@ -13,6 +13,7 @@
 class HomePage(Page):
     pass
 
+
 class FormField(AbstractFormField):
     page = ParentalKey("FormPage", on_delete=models.CASCADE, related_name="form_fields")
 
@@ -45,7 +46,10 @@ def serve(self, request, *args, **kwargs):
                     return JsonResponse(
                         {
                             "alert": "error",
-                            "message": "Erro ao tentar enviar a <strong>formulário!</strong> Verifique os campos obrigatórios. Errors: %s"
+                            "message": (
+                                "Erro ao tentar enviar a <strong>formulário!</strong> "
+                                "Verifique os campos obrigatórios. Errors: %s"
+                            )
                             % form.errors,
                         }
                     )
diff --git a/core/models.py b/core/models.py
index 2a4ecbf..346b774 100644
--- a/core/models.py
+++ b/core/models.py
@@ -1,8 +1,8 @@
 import os
 
 from django.contrib.auth import get_user_model
-from django.db import models, IntegrityError
-from django.db.models import Case, When, Value, IntegerField
+from django.db import IntegrityError, models
+from django.db.models import Case, IntegerField, Value, When
 from django.utils.translation import gettext as _
 from wagtail.admin.panels import FieldPanel
 from wagtail.fields import RichTextField
@@ -225,17 +225,20 @@ def get_object_in_preferred_language(self, language):
         mission = self.filter(language=language)
         if mission:
             return mission
-        
-        language_order = ['pt', 'es', 'en']
+
+        language_order = ["pt", "es", "en"]
         langs = self.all().values_list("language", flat=True)
         languages = Language.objects.filter(id__in=langs)
-        
+
         # Define a ordem baseado na lista language_order
-        order = [When(code2=lang, then=Value(i)) for i, lang in enumerate(language_order)]
+        order = [
+            When(code2=lang, then=Value(i)) for i, lang in enumerate(language_order)
+        ]
         ordered_languages = languages.annotate(
-            language_order=Case(*order, default=Value(len(language_order)), output_field=IntegerField())
-        ).order_by('language_order')
-
+            language_order=Case(
+                *order, default=Value(len(language_order)), output_field=IntegerField()
+            )
+        ).order_by("language_order")
 
         for lang in ordered_languages:
             mission = self.filter(language=lang)
@@ -258,7 +261,7 @@ class RichTextWithLanguage(models.Model):
         AutocompletePanel("language"),
         FieldPanel("rich_text"),
     ]
-    
+
     objects = LanguageFallbackManager()
 
     class Meta:
@@ -298,7 +301,7 @@ def autocomplete_label(self):
     ]
 
     class Meta:
-        unique_together = [("license_type", )]
+        unique_together = [("license_type",)]
         verbose_name = _("License")
         verbose_name_plural = _("Licenses")
         indexes = [
@@ -327,9 +330,7 @@ def get(
     ):
         if not license_type:
             raise ValueError("License.get requires license_type parameters")
-        filters = dict(
-            license_type__iexact=license_type
-        )
+        filters = dict(license_type__iexact=license_type)
         try:
             return cls.objects.get(**filters)
         except cls.MultipleObjectsReturned:
@@ -369,7 +370,8 @@ class LicenseStatement(CommonControlField):
         Language, on_delete=models.SET_NULL, null=True, blank=True
     )
     license = models.ForeignKey(
-        License, on_delete=models.SET_NULL, null=True, blank=True)
+        License, on_delete=models.SET_NULL, null=True, blank=True
+    )
 
     panels = [
         FieldPanel("url"),
@@ -407,7 +409,8 @@ def get(
             raise ValueError("LicenseStatement.get requires url or license_p")
         try:
             return cls.objects.get(
-                url__iexact=url, license_p__iexact=license_p, language=language)
+                url__iexact=url, license_p__iexact=license_p, language=language
+            )
         except cls.MultipleObjectsReturned:
             return cls.objects.filter(
                 url__iexact=url, license_p__iexact=license_p, language=language
@@ -448,9 +451,7 @@ def create_or_update(
     ):
         try:
             data = dict(
-                url=url,
-                license_p=license_p,
-                language=language and language.code2
+                url=url, license_p=license_p, language=language and language.code2
             )
             try:
                 obj = cls.get(url, license_p, language)
@@ -465,7 +466,9 @@ def create_or_update(
             except cls.DoesNotExist:
                 return cls.create(user, url, license_p, language, license)
         except Exception as e:
-            raise ValueError(f"Unable to create or update LicenseStatement for {data}: {type(e)} {e}")
+            raise ValueError(
+                f"Unable to create or update LicenseStatement for {data}: {type(e)} {e}"
+            )
 
     @staticmethod
     def parse_url(url):
@@ -514,7 +517,7 @@ class FileWithLang(models.Model):
         blank=True,
         on_delete=models.SET_NULL,
         verbose_name=_("File"),
-        help_text='',
+        help_text="",
         related_name="+",
     )
 
diff --git a/core/tests/tests_collectors.py b/core/tests/tests_collectors.py
index 6d13a7c..f595da3 100644
--- a/core/tests/tests_collectors.py
+++ b/core/tests/tests_collectors.py
@@ -28,14 +28,20 @@ def test_extract_last_seq_accepts_both_couch_formats(self):
 
     @patch("core.collectors.scielo_books.fetch_document")
     @patch("core.collectors.scielo_books.fetch_changes_page")
-    def test_iter_change_documents_uses_docs_from_changes_payload(self, mock_fetch_changes_page, mock_fetch_document):
+    def test_iter_change_documents_uses_docs_from_changes_payload(
+        self, mock_fetch_changes_page, mock_fetch_document
+    ):
         mock_fetch_changes_page.side_effect = [
             {
                 "results": [
                     {
                         "seq": 10,
                         "id": "book1",
-                        "doc": {"_id": "book1", "TYPE": "Monograph", "title": "Book One"},
+                        "doc": {
+                            "_id": "book1",
+                            "TYPE": "Monograph",
+                            "title": "Book One",
+                        },
                     }
                 ],
                 "last_seq": 10,
@@ -43,7 +49,11 @@ def test_iter_change_documents_uses_docs_from_changes_payload(self, mock_fetch_c
             {"results": [], "last_seq": 10},
         ]
 
-        results = list(scielo_books.iter_change_documents(base_url="https://books.example", db_name="scielobooks_1a"))
+        results = list(
+            scielo_books.iter_change_documents(
+                base_url="https://books.example", db_name="scielobooks_1a"
+            )
+        )
 
         self.assertEqual(len(results), 1)
         self.assertEqual(results[0]["payload"]["id"], "book1")
diff --git a/core/tests/tests_date_utils.py b/core/tests/tests_date_utils.py
index 8d4f9b6..9e12869 100644
--- a/core/tests/tests_date_utils.py
+++ b/core/tests/tests_date_utils.py
@@ -10,27 +10,20 @@
 
 
 class DateUtilsTests(TestCase):
-
     def test_get_date_range_with_valid_dates(self):
         from_date = "2023-01-01"
         until_date = "2023-01-31"
         result = get_date_range_str(from_date_str=from_date, until_date_str=until_date)
 
-        expected = (
-           '2023-01-01',
-           '2023-01-31'
-        )
+        expected = ("2023-01-01", "2023-01-31")
         self.assertEqual(result, expected)
 
     def test_get_date_range_with_invalid_from_date(self):
         from_date = "invalid-date"
         until_date = "2023-01-10"
         result = get_date_range_str(from_date_str=from_date, until_date_str=until_date)
-        
-        expected = (
-            '2023-01-03',
-            '2023-01-10'
-        )
+
+        expected = ("2023-01-03", "2023-01-10")
         self.assertEqual(result, expected)
 
     def test_get_date_range_with_invalid_until_date(self):
@@ -38,10 +31,7 @@ def test_get_date_range_with_invalid_until_date(self):
         until_date = "invalid-date"
         result = get_date_range_str(from_date_str=from_date, until_date_str=until_date)
 
-        expected = (
-            '2024-05-20',
-            '2024-05-27'
-        )
+        expected = ("2024-05-20", "2024-05-27")
         self.assertEqual(result, expected)
 
     def test_get_date_range_with_days_to_go_back(self):
@@ -51,7 +41,7 @@ def test_get_date_range_with_days_to_go_back(self):
 
         expected = (
             (today - timedelta(days=days_to_go_back)).strftime("%Y-%m-%d"),
-            today.strftime("%Y-%m-%d")
+            today.strftime("%Y-%m-%d"),
         )
         self.assertEqual(result, expected)
 
@@ -59,7 +49,7 @@ def test_get_date_range_with_no_params(self):
         result = get_date_range_str()
         expected = (
             (datetime.now().date() - timedelta(days=7)).strftime("%Y-%m-%d"),
-            datetime.now().date().strftime("%Y-%m-%d")
+            datetime.now().date().strftime("%Y-%m-%d"),
         )
         self.assertEqual(result, expected)
 
@@ -67,8 +57,8 @@ def test_get_date_range_with_only_from_date(self):
         from_date = "2025-02-01"
         result = get_date_range_str(from_date_str=from_date)
         expected = (
-            '2025-02-01',
-            '2025-02-08',
+            "2025-02-01",
+            "2025-02-08",
         )
         self.assertEqual(result, expected)
 
@@ -76,15 +66,15 @@ def test_get_date_range_with_only_until_date(self):
         until_date = "2025-02-08"
         result = get_date_range_str(until_date_str=until_date)
         expected = (
-            '2025-02-01',
-            '2025-02-08',
+            "2025-02-01",
+            "2025-02-08",
         )
         self.assertEqual(result, expected)
 
     def test_extract_minute_second_key(self):
         dt = datetime(2023, 3, 15, 14, 30, 45)
         key = extract_minute_second_key(dt)
-        self.assertEqual(key, '30:45')
+        self.assertEqual(key, "30:45")
 
     def test_extract_minute_second_key_returns_none_for_invalid_datetime(self):
         self.assertIsNone(extract_minute_second_key(None))
diff --git a/core/users/admin.py b/core/users/admin.py
index 48460ed..02d6a9d 100644
--- a/core/users/admin.py
+++ b/core/users/admin.py
@@ -3,7 +3,6 @@
 from django.contrib.auth import get_user_model
 from django.utils.translation import gettext_lazy as _
 
-
 User = get_user_model()
 
 
diff --git a/core/users/forms.py b/core/users/forms.py
index 14faa58..6e1dd9d 100644
--- a/core/users/forms.py
+++ b/core/users/forms.py
@@ -4,7 +4,6 @@
 from django.contrib.auth import get_user_model
 from django.utils.translation import gettext_lazy as _
 
-
 User = get_user_model()
 
 
diff --git a/core/users/models.py b/core/users/models.py
index 4d894f2..6a3e360 100644
--- a/core/users/models.py
+++ b/core/users/models.py
@@ -15,7 +15,7 @@ class User(AbstractUser):
     name = models.CharField(_("Name of User"), blank=True, max_length=255)
     first_name = models.CharField(max_length=150, blank=True, verbose_name="first name")
     last_name = models.CharField(max_length=150, blank=True, verbose_name="last name")
-    
+
     def get_absolute_url(self):
         """Get url for user's detail view.
 
diff --git a/core/users/tasks.py b/core/users/tasks.py
index 7ee093a..39511e8 100644
--- a/core/users/tasks.py
+++ b/core/users/tasks.py
@@ -5,7 +5,7 @@
 User = get_user_model()
 
 
-@celery_app.task(bind=True, name='Get users count')
+@celery_app.task(bind=True, name="Get users count")
 def get_users_count(self):
     """A pointless Celery task to demonstrate usage."""
     return User.objects.count()
diff --git a/core/users/tests/test_urls.py b/core/users/tests/test_urls.py
index c393ced..3dcbdf2 100644
--- a/core/users/tests/test_urls.py
+++ b/core/users/tests/test_urls.py
@@ -1,5 +1,4 @@
 import pytest
-
 from django.urls import resolve, reverse
 
 from core.users.models import User
diff --git a/core/users/views.py b/core/users/views.py
index 42d187f..488c294 100644
--- a/core/users/views.py
+++ b/core/users/views.py
@@ -5,7 +5,6 @@
 from django.utils.translation import gettext_lazy as _
 from django.views.generic import DetailView, RedirectView, UpdateView
 
-
 User = get_user_model()
 
 
diff --git a/core/utils/csv_utils.py b/core/utils/csv_utils.py
index 23d3949..3fa2da7 100644
--- a/core/utils/csv_utils.py
+++ b/core/utils/csv_utils.py
@@ -13,14 +13,16 @@ def get_load_data_function(file_path):
     Returns:
     function: The corresponding function to load data from the file.
     """
-    if file_path.lower().endswith('.csv'):
+    if file_path.lower().endswith(".csv"):
         return load_csv
-    
-    if file_path.lower().endswith('.tar.gz') or ('.tar' in file_path.lower() and file_path.lower().endswith('.gz')):
+
+    if file_path.lower().endswith(".tar.gz") or (
+        ".tar" in file_path.lower() and file_path.lower().endswith(".gz")
+    ):
         return load_tar_gz
 
 
-def load_csv(file_obj, delimiter='\t', is_stream=False):
+def load_csv(file_obj, delimiter="\t", is_stream=False):
     """
     Loads and processes a CSV file, yielding each row as a dictionary.
 
@@ -33,16 +35,16 @@ def load_csv(file_obj, delimiter='\t', is_stream=False):
     dict: Each row of the CSV file as a dictionary.
     """
     if is_stream:
-        file_obj = io.StringIO(file_obj.decode('utf-8'))
+        file_obj = io.StringIO(file_obj.decode("utf-8"))
 
     with file_obj if is_stream else open(file_obj) as fin:
         first_line = fin.readline().strip()
         if not first_line:
             return
-        
+
         header = first_line.split(delimiter)
         reader = csv.DictReader(
-            fin, 
+            fin,
             fieldnames=header,
             delimiter=delimiter,
         )
@@ -50,7 +52,7 @@ def load_csv(file_obj, delimiter='\t', is_stream=False):
             yield row
 
 
-def load_tar_gz(file_path, delimiter='\t'):
+def load_tar_gz(file_path, delimiter="\t"):
     """
     Loads and processes CSV files from within a tar.gz archive, yielding each row as a dictionary.
 
@@ -61,12 +63,8 @@ def load_tar_gz(file_path, delimiter='\t'):
     Yields:
     dict: Each row of each CSV file within the tar.gz archive as a dictionary.
     """
-    with tarfile.open(file_path, 'r:gz') as tar:
+    with tarfile.open(file_path, "r:gz") as tar:
         for member in tar.getmembers():
-            if member.isfile() and member.name.lower().endswith('.csv'):
+            if member.isfile() and member.name.lower().endswith(".csv"):
                 file_content = tar.extractfile(member).read()
-                yield from load_csv(
-                    file_content, 
-                    delimiter=delimiter, 
-                    is_stream=True
-                )
+                yield from load_csv(file_content, delimiter=delimiter, is_stream=True)
diff --git a/core/utils/date_utils.py b/core/utils/date_utils.py
index f20ffea..4f3df0e 100644
--- a/core/utils/date_utils.py
+++ b/core/utils/date_utils.py
@@ -1,5 +1,4 @@
 import logging
-
 from datetime import datetime, timedelta
 
 
@@ -32,12 +31,17 @@ def get_date_obj(date_str: str, format: str = "%Y-%m-%d") -> datetime.date:
         return None
 
 
-def get_date_range_str(from_date_str: str = None, until_date_str: str = None, days_to_go_back: int = None) -> tuple[str, str]:
+def get_date_range_str(
+    from_date_str: str = None,
+    until_date_str: str = None,
+    days_to_go_back: int = None,
+) -> tuple[str, str]:
     """
     Get the date range to be used in the queries.
 
     If both from_date_str and until_date_str are provided, they will be used.
-    If only one is provided, it will be used as the start or end date, and the other will be calculated based on a 7-day range.
+    If only one is provided, it will be used as the start or end date,
+    and the other will be calculated based on a 7-day range.
     If neither is provided, the function will default to the last 7 days from today.
     If days_to_go_back is provided, it will override the from_date_str and until_date_str.
 
@@ -52,7 +56,9 @@ def get_date_range_str(from_date_str: str = None, until_date_str: str = None, da
     today = datetime.now().date()
 
     if days_to_go_back:
-        return get_date_str(today - timedelta(days=days_to_go_back)), get_date_str(today)
+        return get_date_str(today - timedelta(days=days_to_go_back)), get_date_str(
+            today
+        )
 
     from_date_obj = get_date_obj(from_date_str)
     until_date_obj = get_date_obj(until_date_str)
@@ -65,7 +71,7 @@ def get_date_range_str(from_date_str: str = None, until_date_str: str = None, da
 
     if until_date_obj:
         return get_date_str(until_date_obj - timedelta(days=7)), until_date_str
-    
+
     return get_date_str(today - timedelta(days=7)), get_date_str(today)
 
 
@@ -73,12 +79,12 @@ def get_date_obj_from_timestamp(timestamp):
     return datetime.fromtimestamp(timestamp).date()
 
 
-def get_date_objs_from_date_range(from_date, until_date, format='%Y-%m-%d'):
+def get_date_objs_from_date_range(from_date, until_date, format="%Y-%m-%d"):
     visible_dates = []
 
     if not isinstance(from_date, datetime):
         from_date = datetime.strptime(from_date, format).date()
-        
+
     if not isinstance(until_date, datetime):
         until_date = datetime.strptime(until_date, format).date()
 
@@ -131,7 +137,9 @@ def _coerce_datetime(dt):
         try:
             return datetime.strptime(dt, "%Y-%m-%d %H:%M:%S")
         except ValueError:
-            logging.error("Invalid datetime string format. Expected '%Y-%m-%d %H:%M:%S'.")
+            logging.error(
+                "Invalid datetime string format. Expected '%Y-%m-%d %H:%M:%S'."
+            )
             return None
 
     logging.error("Invalid datetime value: %r.", dt)
diff --git a/core/utils/metadata.py b/core/utils/metadata.py
new file mode 100644
index 0000000..01e78a5
--- /dev/null
+++ b/core/utils/metadata.py
@@ -0,0 +1,43 @@
+def as_list(value):
+    if not value:
+        return []
+
+    if isinstance(value, list):
+        return value
+
+    return [value]
+
+
+def compact_dict(data):
+    return {
+        key: value for key, value in data.items() if value not in (None, "", [], {}, ())
+    }
+
+
+def get_value(data, key, default=None):
+    if isinstance(data, dict):
+        return data.get(key, default)
+    return getattr(data, key, default)
+
+
+def normalize_langs(value):
+    if not value:
+        return []
+
+    if isinstance(value, list):
+        return [item for item in value if item not in (None, "")]
+
+    if isinstance(value, dict):
+        return [key for key, enabled in value.items() if enabled]
+
+    return [value]
+
+
+def normalize_year(value, fallback_date=None):
+    if value not in (None, ""):
+        return str(value)[:4]
+
+    if fallback_date not in (None, ""):
+        return str(fallback_date)[:4]
+
+    return None
diff --git a/core/utils/request_utils.py b/core/utils/request_utils.py
index c4fbec6..084cd46 100644
--- a/core/utils/request_utils.py
+++ b/core/utils/request_utils.py
@@ -1,15 +1,13 @@
 import logging
 
 import requests
+from django.contrib.auth import get_user_model
 from tenacity import (
     retry,
     retry_if_exception_type,
     stop_after_attempt,
     wait_exponential,
 )
-from urllib3.util import Retry
-from django.contrib.auth import get_user_model
-
 
 logger = logging.getLogger(__name__)
 User = get_user_model()
diff --git a/core/utils/standardizer.py b/core/utils/standardizer.py
index c228bf5..bcd0cb2 100644
--- a/core/utils/standardizer.py
+++ b/core/utils/standardizer.py
@@ -46,7 +46,7 @@ def standardize_doi(text):
     ]
     for prefix in doi_prefixes:
         if text.lower().startswith(prefix):
-            text = text[len(prefix):]
+            text = text[len(prefix) :]
             break
 
     if text.lower().startswith("10."):
@@ -75,3 +75,25 @@ def language_iso(code):
     if langcodes.tag_is_valid(code):
         return langcodes.standardize_tag(code)
     return ""
+
+
+def standardize_or_default(func, value, default=""):
+    try:
+        return func(value)
+    except Exception:
+        return default
+
+
+def standardize_pid_generic_values(values):
+    if not isinstance(values, (list, tuple, set)):
+        return []
+
+    items = []
+
+    for value in values:
+        item = standardize_or_default(standardize_pid_generic, value)
+
+        if item and item not in items:
+            items.append(item)
+
+    return items
diff --git a/core/wagtail_hooks.py b/core/wagtail_hooks.py
index e7da1eb..0604472 100644
--- a/core/wagtail_hooks.py
+++ b/core/wagtail_hooks.py
@@ -4,7 +4,6 @@
 from django.utils.html import format_html
 from wagtail import hooks
 
-
 HIDDEN_MAIN_MENU_ITEMS = {
     "documents",
     "explorer",
diff --git a/core_settings/admin.py b/core_settings/admin.py
index 8c38f3f..846f6b4 100644
--- a/core_settings/admin.py
+++ b/core_settings/admin.py
@@ -1,3 +1 @@
-from django.contrib import admin
-
 # Register your models here.
diff --git a/core_settings/tests.py b/core_settings/tests.py
index 7ce503c..a39b155 100644
--- a/core_settings/tests.py
+++ b/core_settings/tests.py
@@ -1,3 +1 @@
-from django.test import TestCase
-
 # Create your tests here.
diff --git a/core_settings/views.py b/core_settings/views.py
index 91ea44a..60f00ef 100644
--- a/core_settings/views.py
+++ b/core_settings/views.py
@@ -1,3 +1 @@
-from django.shortcuts import render
-
 # Create your views here.
diff --git a/django_celery_beat/models.py b/django_celery_beat/models.py
index 466c16e..583a9aa 100644
--- a/django_celery_beat/models.py
+++ b/django_celery_beat/models.py
@@ -73,14 +73,15 @@ def crontab_schedule_celery_timezone():
     except AttributeError:
         return "UTC"
 
-    # evita `AttributeError: type object 'TimeZoneField' has no attribute 'default_choices'`
-    return "UTC"
-    return (
-        CELERY_TIMEZONE
-        if CELERY_TIMEZONE
-        in [choice[0].zone for choice in timezone_field.TimeZoneField.default_choices]
-        else "UTC"
-    )
+    if not CELERY_TIMEZONE:
+        return "UTC"
+
+    try:
+        timezone = timezone_field.TimeZoneField().to_python(CELERY_TIMEZONE)
+    except ValidationError:
+        return "UTC"
+
+    return str(timezone)
 
 
 class SolarSchedule(models.Model):
diff --git a/django_celery_beat/wagtail_hooks.py b/django_celery_beat/wagtail_hooks.py
index 492a642..e8c1994 100644
--- a/django_celery_beat/wagtail_hooks.py
+++ b/django_celery_beat/wagtail_hooks.py
@@ -7,12 +7,9 @@
 from django.utils.translation import gettext_lazy as _
 from kombu.utils.json import loads
 from wagtail import hooks
-from wagtail_modeladmin.options import (
-    ModelAdmin,
-    ModelAdminGroup,
-    modeladmin_register,
-)
+from wagtail_modeladmin.options import ModelAdmin, ModelAdminGroup, modeladmin_register
 
+from config.menu import get_menu_order
 from django_celery_beat.models import (
     ClockedSchedule,
     CrontabSchedule,
@@ -23,8 +20,6 @@
 )
 from django_celery_beat.utils import is_database_scheduler
 
-from config.menu import get_menu_order
-
 from .button_helper import PeriodicTaskHelper
 
 
diff --git a/document/__init__.py b/document/__init__.py
index 8b13789..e69de29 100644
--- a/document/__init__.py
+++ b/document/__init__.py
@@ -1 +0,0 @@
-
diff --git a/document/management/__init__.py b/document/management/__init__.py
index 8b13789..e69de29 100644
--- a/document/management/__init__.py
+++ b/document/management/__init__.py
@@ -1 +0,0 @@
-
diff --git a/document/management/commands/__init__.py b/document/management/commands/__init__.py
index 8b13789..e69de29 100644
--- a/document/management/commands/__init__.py
+++ b/document/management/commands/__init__.py
@@ -1 +0,0 @@
-
diff --git a/document/management/commands/load_articles_by_year.py b/document/management/commands/load_articles_by_year.py
index a922456..4b7e078 100644
--- a/document/management/commands/load_articles_by_year.py
+++ b/document/management/commands/load_articles_by_year.py
@@ -1,7 +1,7 @@
 from django.core.management.base import BaseCommand
 
-from document.tasks import task_load_documents_from_article_meta
-from document.tasks import task_load_documents_from_opac
+from document.tasks.articlemeta import task_load_documents_from_article_meta
+from document.tasks.opac import task_load_documents_from_opac
 
 
 class Command(BaseCommand):
diff --git a/document/migrations/__init__.py b/document/migrations/__init__.py
index 8b13789..e69de29 100644
--- a/document/migrations/__init__.py
+++ b/document/migrations/__init__.py
@@ -1 +0,0 @@
-
diff --git a/document/models.py b/document/models.py
index 5197692..d78968b 100644
--- a/document/models.py
+++ b/document/models.py
@@ -174,6 +174,52 @@ class Document(CommonControlField):
     def __str__(self):
         return f"{self.collection.acron3} - {self.document_type} - {self.document_id}"
 
+    @classmethod
+    def build_book_pid_generic(cls, book_id):
+        if book_id in (None, ""):
+            return None
+        return f"book:{book_id}"
+
+    @classmethod
+    def build_chapter_pid_generic(cls, book_id, chapter_id):
+        if book_id in (None, "") or chapter_id in (None, ""):
+            return None
+        return f"book:{book_id}/chapter:{chapter_id}"
+
+    @classmethod
+    def find_by_identifiers(cls, collection, document_type, *identifiers):
+        identifiers = [str(value) for value in identifiers if value not in (None, "")]
+        if not identifiers:
+            return None
+
+        queryset = cls.objects.filter(
+            collection=collection,
+            document_type=document_type,
+        )
+
+        for field_name in ("document_id", "pid_v2", "pid_v3", "pid_generic"):
+            for identifier in identifiers:
+                document = queryset.filter(**{field_name: identifier}).first()
+                if document:
+                    return document
+
+        return None
+
+    @classmethod
+    def book_exists_for_raw_id(cls, collection, raw_id):
+        return cls.objects.filter(
+            collection=collection,
+            document_type=cls.DOCUMENT_TYPE_BOOK,
+            extra_data__raw_id=str(raw_id),
+        ).exists()
+
+    @classmethod
+    def delete_documents_by_raw_id(cls, collection, raw_id):
+        return cls.objects.filter(
+            collection=collection,
+            extra_data__raw_id=str(raw_id),
+        ).delete()
+
     @classmethod
     def metadata(cls, collection=None):
         queryset = cls.objects.select_related("collection", "source").only(
@@ -215,7 +261,9 @@ def metadata(cls, collection=None):
                 "files": document.files or {},
                 "identifiers": document.identifiers or {},
                 "parent_document_id": (
-                    document.parent_document.document_id if document.parent_document else None
+                    document.parent_document.document_id
+                    if document.parent_document
+                    else None
                 ),
                 "pid_generic": document.pid_generic,
                 "pid_v2": document.pid_v2,
@@ -223,7 +271,8 @@ def metadata(cls, collection=None):
                 "processing_date": document.processing_date,
                 "publication_date": document.publication_date,
                 "publication_year": document.publication_year,
-                "scielo_issn": document.scielo_issn or (source.scielo_issn if source else None),
+                "scielo_issn": document.scielo_issn
+                or (source.scielo_issn if source else None),
                 "source_id": source.source_id if source else None,
                 "source_type": source.source_type if source else None,
                 "text_langs": document.text_langs or [],
diff --git a/document/services/__init__.py b/document/services/__init__.py
index 8b13789..e69de29 100644
--- a/document/services/__init__.py
+++ b/document/services/__init__.py
@@ -1 +0,0 @@
-
diff --git a/document/services/articles.py b/document/services/article.py
similarity index 82%
rename from document/services/articles.py
rename to document/services/article.py
index 09244b3..c6f6d42 100644
--- a/document/services/articles.py
+++ b/document/services/article.py
@@ -1,6 +1,6 @@
 from document.models import Document
 
-from .common import build_document_id, compact_dict, get_existing_document, normalize_langs, normalize_year
+from core.utils.metadata import compact_dict, normalize_langs, normalize_year
 
 
 def upsert_article_document_from_articlemeta(
@@ -11,11 +11,13 @@ def upsert_article_document_from_articlemeta(
     force_update=True,
 ):
     pid_v2 = payload.get("code")
-    document_id = build_document_id(pid_v2, payload.get("pid_v3"), payload.get("pid_generic"))
+    document_id = _first_identifier(
+        pid_v2, payload.get("pid_v3"), payload.get("pid_generic")
+    )
     if not document_id:
         return None
 
-    document = get_existing_document(
+    document = Document.find_by_identifiers(
         collection,
         Document.DOCUMENT_TYPE_ARTICLE,
         document_id,
@@ -47,8 +49,12 @@ def upsert_article_document_from_articlemeta(
         document.default_lang = payload.get("default_language") or document.default_lang
         document.text_langs = normalize_langs(payload.get("text_langs"))
         document.default_media_format = document.default_media_format
-        document.processing_date = payload.get("processing_date") or document.processing_date
-        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.processing_date = (
+            payload.get("processing_date") or document.processing_date
+        )
+        document.publication_date = (
+            payload.get("publication_date") or document.publication_date
+        )
         document.publication_year = normalize_year(
             payload.get("publication_year"),
             fallback_date=document.publication_date,
@@ -79,11 +85,15 @@ def upsert_article_document_from_opac(
 ):
     pid_v2 = payload.get("pid_v2")
     pid_v3 = payload.get("pid_v3")
-    document_id = build_document_id(pid_v2, pid_v3, payload.get("pid_generic"))
+    document_id = _first_identifier(
+        pid_v2,
+        pid_v3,
+        payload.get("pid_generic"),
+    )
     if not document_id:
         return None
 
-    document = get_existing_document(
+    document = Document.find_by_identifiers(
         collection,
         Document.DOCUMENT_TYPE_ARTICLE,
         document_id,
@@ -115,10 +125,14 @@ def upsert_article_document_from_opac(
         )
         document.files = document.files or {}
         document.default_lang = payload.get("default_language") or document.default_lang
-        document.text_langs = normalize_langs(payload.get("text_langs")) or document.text_langs or []
+        document.text_langs = (
+            normalize_langs(payload.get("text_langs")) or document.text_langs or []
+        )
         document.default_media_format = document.default_media_format
         document.processing_date = document.processing_date
-        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.publication_date = (
+            payload.get("publication_date") or document.publication_date
+        )
         document.publication_year = normalize_year(
             payload.get("publication_year"),
             fallback_date=document.publication_date,
@@ -164,3 +178,10 @@ def _merge_dicts(current, new_values):
     merged = dict(current or {})
     merged.update(new_values or {})
     return merged
+
+
+def _first_identifier(*values):
+    for value in values:
+        if value not in (None, ""):
+            return str(value)
+    return None
diff --git a/document/services/books.py b/document/services/book.py
similarity index 71%
rename from document/services/books.py
rename to document/services/book.py
index 96d92e1..3b5a86a 100644
--- a/document/services/books.py
+++ b/document/services/book.py
@@ -1,16 +1,6 @@
 from document.models import Document
 
-
-def build_book_pid_generic(book_id):
-    if book_id in (None, ""):
-        return None
-    return f"book:{book_id}"
-
-
-def build_chapter_pid_generic(book_id, chapter_id):
-    if book_id in (None, "") or chapter_id in (None, ""):
-        return None
-    return f"book:{book_id}/chapter:{chapter_id}"
+from core.utils.metadata import compact_dict, normalize_langs, normalize_year
 
 
 def enrich_part_payload(payload, monograph_payload):
@@ -43,7 +33,7 @@ def upsert_monograph_document(
         return None
 
     book_id = str(payload.get("id"))
-    pid_generic = build_book_pid_generic(book_id)
+    pid_generic = Document.build_book_pid_generic(book_id)
     document, created = Document.objects.get_or_create(
         collection=collection,
         document_type=Document.DOCUMENT_TYPE_BOOK,
@@ -64,11 +54,11 @@ def upsert_monograph_document(
         document.identifiers = _build_monograph_identifiers(payload)
         document.files = {}
         document.default_lang = payload.get("language") or None
-        document.text_langs = _unique_list(payload.get("language"))
+        document.text_langs = normalize_langs(payload.get("language"))
         document.default_media_format = None
         document.processing_date = None
         document.publication_date = payload.get("publication_date") or None
-        document.publication_year = _normalize_year(payload.get("year"))
+        document.publication_year = normalize_year(payload.get("year"))
         document.extra_data = _build_monograph_extra_data(
             payload,
             source_url=source_url,
@@ -97,7 +87,7 @@ def upsert_part_document(
 
     book_id = payload.get("monograph")
     chapter_id = payload.get("id")
-    pid_generic = build_chapter_pid_generic(book_id, chapter_id)
+    pid_generic = Document.build_chapter_pid_generic(book_id, chapter_id)
     document, created = Document.objects.get_or_create(
         collection=collection,
         document_type=Document.DOCUMENT_TYPE_CHAPTER,
@@ -118,17 +108,15 @@ def upsert_part_document(
         document.identifiers = _build_part_identifiers(payload)
         document.files = {}
         document.default_lang = (
-            payload.get("text_language")
-            or payload.get("monograph_language")
-            or None
+            payload.get("text_language") or payload.get("monograph_language") or None
         )
-        document.text_langs = _unique_list(
+        document.text_langs = normalize_langs(
             payload.get("text_language") or payload.get("monograph_language")
         )
         document.default_media_format = None
         document.processing_date = None
         document.publication_date = payload.get("monograph_publication_date") or None
-        document.publication_year = _normalize_year(payload.get("monograph_year"))
+        document.publication_year = normalize_year(payload.get("monograph_year"))
         document.extra_data = _build_part_extra_data(
             payload,
             source_url=source_url,
@@ -142,37 +130,6 @@ def upsert_part_document(
     return document
 
 
-def delete_book_document(collection, book_id):
-    return Document.objects.filter(
-        collection=collection,
-        document_type=Document.DOCUMENT_TYPE_BOOK,
-        document_id=build_book_pid_generic(book_id),
-    ).delete()
-
-
-def delete_document_by_raw_id(collection, raw_id):
-    return Document.objects.filter(
-        collection=collection,
-        extra_data__raw_id=str(raw_id),
-    ).delete()
-
-
-def has_monograph_document_for_raw_id(collection, raw_id):
-    return Document.objects.filter(
-        collection=collection,
-        document_type=Document.DOCUMENT_TYPE_BOOK,
-        extra_data__raw_id=str(raw_id),
-    ).exists()
-
-
-def get_monograph_document(collection, book_id):
-    return Document.objects.filter(
-        collection=collection,
-        document_type=Document.DOCUMENT_TYPE_BOOK,
-        document_id=build_book_pid_generic(book_id),
-    ).first()
-
-
 def _build_monograph_identifiers(payload):
     identifiers = {
         "book_id": str(payload.get("id")) if payload.get("id") is not None else None,
@@ -180,19 +137,21 @@ def _build_monograph_identifiers(payload):
         "eisbn": payload.get("eisbn"),
         "doi": payload.get("doi_number"),
     }
-    return _compact_dict(identifiers)
+    return compact_dict(identifiers)
 
 
 def _build_part_identifiers(payload):
     identifiers = {
-        "book_id": str(payload.get("monograph")) if payload.get("monograph") is not None else None,
+        "book_id": str(payload.get("monograph"))
+        if payload.get("monograph") is not None
+        else None,
         "chapter_id": str(payload.get("id")) if payload.get("id") is not None else None,
         "isbn": payload.get("monograph_isbn"),
         "eisbn": payload.get("monograph_eisbn"),
         "doi": payload.get("doi_number"),
         "book_doi": payload.get("monograph_doi_number"),
     }
-    return _compact_dict(identifiers)
+    return compact_dict(identifiers)
 
 
 def _build_monograph_extra_data(payload, source_url=None, last_seq=None):
@@ -211,7 +170,7 @@ def _build_monograph_extra_data(payload, source_url=None, last_seq=None):
         "translated_synopses": payload.get("translated_synopses"),
         "synopsis": payload.get("synopsis"),
     }
-    return _compact_dict(extra_data)
+    return compact_dict(extra_data)
 
 
 def _build_part_extra_data(payload, source_url=None, last_seq=None):
@@ -225,7 +184,9 @@ def _build_part_extra_data(payload, source_url=None, last_seq=None):
         "pages": payload.get("pages"),
         "creators": payload.get("creators"),
         "translated_titles": payload.get("translated_titles"),
-        "monograph_id": str(payload.get("monograph")) if payload.get("monograph") is not None else None,
+        "monograph_id": str(payload.get("monograph"))
+        if payload.get("monograph") is not None
+        else None,
         "monograph_title": payload.get("monograph_title"),
         "monograph_language": payload.get("monograph_language"),
         "monograph_publication_date": payload.get("monograph_publication_date"),
@@ -233,24 +194,4 @@ def _build_part_extra_data(payload, source_url=None, last_seq=None):
         "monograph_publisher": payload.get("monograph_publisher"),
         "monograph_creators": payload.get("monograph_creators"),
     }
-    return _compact_dict(extra_data)
-
-
-def _unique_list(value):
-    if not value:
-        return []
-    return [value]
-
-
-def _normalize_year(value):
-    if value in (None, ""):
-        return None
-    return str(value)[:4]
-
-
-def _compact_dict(data):
-    return {
-        key: value
-        for key, value in data.items()
-        if value not in (None, "", [], {}, ())
-    }
+    return compact_dict(extra_data)
diff --git a/document/services/common.py b/document/services/common.py
deleted file mode 100644
index 91e103d..0000000
--- a/document/services/common.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from document.models import Document
-
-
-def build_document_id(*values):
-    for value in values:
-        if value not in (None, ""):
-            return str(value)
-    return None
-
-
-def get_existing_document(collection, document_type, *identifiers):
-    identifiers = [str(value) for value in identifiers if value not in (None, "")]
-    if not identifiers:
-        return None
-
-    queryset = Document.objects.filter(
-        collection=collection,
-        document_type=document_type,
-    )
-
-    for field_name in ("document_id", "pid_v2", "pid_v3", "pid_generic"):
-        for identifier in identifiers:
-            document = queryset.filter(**{field_name: identifier}).first()
-            if document:
-                return document
-
-    return None
-
-
-def normalize_langs(value):
-    if not value:
-        return []
-
-    if isinstance(value, list):
-        return [item for item in value if item not in (None, "")]
-
-    if isinstance(value, dict):
-        return [key for key, enabled in value.items() if enabled]
-
-    return [value]
-
-
-def normalize_year(value, fallback_date=None):
-    if value not in (None, ""):
-        return str(value)[:4]
-
-    if fallback_date not in (None, ""):
-        return str(fallback_date)[:4]
-
-    return None
-
-
-def compact_dict(data):
-    return {
-        key: value
-        for key, value in data.items()
-        if value not in (None, "", [], {}, ())
-    }
diff --git a/document/services/datasets.py b/document/services/dataset.py
similarity index 91%
rename from document/services/datasets.py
rename to document/services/dataset.py
index 2496b20..c6f5bb5 100644
--- a/document/services/datasets.py
+++ b/document/services/dataset.py
@@ -1,6 +1,6 @@
 from document.models import Document
 
-from .common import compact_dict, normalize_year
+from core.utils.metadata import compact_dict, normalize_year
 
 
 def upsert_dataset_document(
@@ -51,7 +51,9 @@ def upsert_dataset_document(
         document.text_langs = document.text_langs or []
         document.default_media_format = document.default_media_format
         document.processing_date = document.processing_date
-        document.publication_date = payload.get("dataset_published") or document.publication_date
+        document.publication_date = (
+            payload.get("dataset_published") or document.publication_date
+        )
         document.publication_year = normalize_year(
             None,
             fallback_date=document.publication_date,
diff --git a/document/services/preprints.py b/document/services/preprint.py
similarity index 89%
rename from document/services/preprints.py
rename to document/services/preprint.py
index 4be89f1..cfcca48 100644
--- a/document/services/preprints.py
+++ b/document/services/preprint.py
@@ -1,6 +1,6 @@
 from document.models import Document
 
-from .common import compact_dict, normalize_langs, normalize_year
+from core.utils.metadata import compact_dict, normalize_langs, normalize_year
 
 
 def upsert_preprint_document(
@@ -40,7 +40,9 @@ def upsert_preprint_document(
         document.text_langs = normalize_langs(payload.get("text_langs"))
         document.default_media_format = document.default_media_format
         document.processing_date = document.processing_date
-        document.publication_date = payload.get("publication_date") or document.publication_date
+        document.publication_date = (
+            payload.get("publication_date") or document.publication_date
+        )
         document.publication_year = normalize_year(
             payload.get("publication_year"),
             fallback_date=document.publication_date,
diff --git a/document/tasks/__init__.py b/document/tasks/__init__.py
index 95a0ba5..e69de29 100644
--- a/document/tasks/__init__.py
+++ b/document/tasks/__init__.py
@@ -1,28 +0,0 @@
-from .articlemeta import (
-    load_documents_from_article_meta,
-    task_load_documents_from_article_meta,
-)
-from .common import (
-    get_latest_scielo_books_last_seq,
-)
-from .dataverse import (
-    load_dataset_metadata_from_dataverse,
-    task_load_dataset_metadata_into_documents,
-)
-from .opac import (
-    load_documents_from_opac,
-    task_load_documents_from_opac,
-)
-from .pipeline import (
-    task_daily_metadata_sync_pipeline,
-)
-from .preprints import (
-    load_preprints_from_preprints_api,
-    task_load_preprints_into_documents,
-)
-from .scielo_books import (
-    load_documents_from_scielo_books,
-    sync_documents_from_scielo_books,
-    task_load_documents_from_scielo_books,
-    task_sync_documents_from_scielo_books,
-)
diff --git a/document/tasks/articlemeta.py b/document/tasks/articlemeta.py
index 75b2689..6fbd0b4 100644
--- a/document/tasks/articlemeta.py
+++ b/document/tasks/articlemeta.py
@@ -3,15 +3,14 @@
 from django.db import DataError
 from django.utils.translation import gettext as _
 
+from config import celery_app
 from core.collectors import articlemeta as articlemeta_collector
 from core.utils import date_utils
 from core.utils.request_utils import _get_user
-from document.services import articles as article_service
-from source.services import journals as journal_service
-
-from config import celery_app
+from document.services import article as article_service
+from source.models import Source
 
-from .common import _get_collection
+from document.tasks.common import _get_collection
 
 
 def load_documents_from_article_meta(
@@ -60,7 +59,7 @@ def load_documents_from_article_meta(
                 )
                 continue
 
-            source = journal_service.find_journal_source_by_issns(
+            source = Source.find_journal_by_issns(
                 collection_obj,
                 payload.get("code_title"),
             )
@@ -86,8 +85,8 @@ def load_documents_from_article_meta(
                     "Collection: %s, Source: %s, PIDv2: %s. Error: %s",
                     collection_obj,
                     source.source_id,
-                    payload.get('code'),
-                    exc
+                    payload.get("code"),
+                    exc,
                 )
                 continue
 
@@ -96,7 +95,12 @@ def load_documents_from_article_meta(
     return True
 
 
-@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Article Meta)"), timelimit=-1, queue="load")
+@celery_app.task(
+    bind=True,
+    name=_("[Metadata] Sync Documents (Article Meta)"),
+    timelimit=-1,
+    queue="load",
+)
 def task_load_documents_from_article_meta(
     self,
     from_date=None,
diff --git a/document/tasks/dataverse.py b/document/tasks/dataverse.py
index 15618a5..43d74de 100644
--- a/document/tasks/dataverse.py
+++ b/document/tasks/dataverse.py
@@ -3,14 +3,13 @@
 from django.db import DataError
 from django.utils.translation import gettext as _
 
+from config import celery_app
 from core.collectors import dataverse as dataverse_collector
 from core.utils import date_utils
 from core.utils.request_utils import _get_user
-from document.services import datasets as dataset_service
-
-from config import celery_app
+from document.services import dataset as dataset_service
 
-from .common import _get_collection
+from document.tasks.common import _get_collection
 
 
 def load_dataset_metadata_from_dataverse(
@@ -52,15 +51,20 @@ def load_dataset_metadata_from_dataverse(
             logging.error(
                 "Error saving Dataset Document. Collection: %s, PID: %s. Error: %s",
                 collection_obj,
-                payload.get('dataset_doi'),
-                exc
+                payload.get("dataset_doi"),
+                exc,
             )
             continue
 
     return True
 
 
-@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Dataverse)"), timelimit=-1, queue="load")
+@celery_app.task(
+    bind=True,
+    name=_("[Metadata] Sync Documents (Dataverse)"),
+    timelimit=-1,
+    queue="load",
+)
 def task_load_dataset_metadata_into_documents(
     self,
     from_date=None,
diff --git a/document/tasks/opac.py b/document/tasks/opac.py
index 5e1c81e..3256a73 100644
--- a/document/tasks/opac.py
+++ b/document/tasks/opac.py
@@ -3,15 +3,14 @@
 from django.db import DataError
 from django.utils.translation import gettext as _
 
+from config import celery_app
 from core.collectors import opac as opac_collector
 from core.utils import date_utils
 from core.utils.request_utils import _get_user
-from document.services import articles as article_service
-from source.services import journals as journal_service
-
-from config import celery_app
+from document.services import article as article_service
+from source.models import Source
 
-from .common import _get_collection
+from document.tasks.common import _get_collection
 
 
 def load_documents_from_opac(
@@ -45,7 +44,7 @@ def load_documents_from_opac(
         documents = response.get("documents") or {}
 
         for payload in documents.values():
-            source = journal_service.find_journal_source_by_acronym(
+            source = Source.find_journal_by_acronym(
                 collection_obj,
                 payload.get("journal_acronym"),
             )
@@ -71,8 +70,8 @@ def load_documents_from_opac(
                     "Collection: %s, Source: %s, PIDv2: %s. Error: %s",
                     collection_obj,
                     source.source_id,
-                    payload.get('pid_v2'),
-                    exc
+                    payload.get("pid_v2"),
+                    exc,
                 )
                 continue
 
@@ -83,7 +82,9 @@ def load_documents_from_opac(
     return True
 
 
-@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (OPAC)"), timelimit=-1, queue="load")
+@celery_app.task(
+    bind=True, name=_("[Metadata] Sync Documents (OPAC)"), timelimit=-1, queue="load"
+)
 def task_load_documents_from_opac(
     self,
     collection="scl",
diff --git a/document/tasks/pipeline.py b/document/tasks/pipeline.py
index 97bef7c..1073aa8 100644
--- a/document/tasks/pipeline.py
+++ b/document/tasks/pipeline.py
@@ -5,20 +5,24 @@
 
 from config import celery_app
 
-from .articlemeta import task_load_documents_from_article_meta
-from .dataverse import task_load_dataset_metadata_into_documents
-from .opac import task_load_documents_from_opac
-from .preprints import task_load_preprints_into_documents
-from .scielo_books import task_sync_documents_from_scielo_books
+from document.tasks.articlemeta import task_load_documents_from_article_meta
+from document.tasks.dataverse import task_load_dataset_metadata_into_documents
+from document.tasks.opac import task_load_documents_from_opac
+from document.tasks.preprints import task_load_preprints_into_documents
+from document.tasks.scielo_books import task_sync_documents_from_scielo_books
 
 
-@celery_app.task(bind=True, name=_("[Metadata] Daily Sync Routine (Auto)"), queue="load")
+@celery_app.task(
+    bind=True, name=_("[Metadata] Daily Sync Routine (Auto)"), queue="load"
+)
 def task_daily_metadata_sync_pipeline(self):
     logging.info("Starting Daily Metadata Sync Pipeline")
-    group([
-        task_load_documents_from_article_meta.s(),
-        task_load_documents_from_opac.s(),
-        task_load_preprints_into_documents.s(),
-        task_load_dataset_metadata_into_documents.s(),
-        task_sync_documents_from_scielo_books.s(),
-    ]).apply_async()
+    group(
+        [
+            task_load_documents_from_article_meta.s(),
+            task_load_documents_from_opac.s(),
+            task_load_preprints_into_documents.s(),
+            task_load_dataset_metadata_into_documents.s(),
+            task_sync_documents_from_scielo_books.s(),
+        ]
+    ).apply_async()
diff --git a/document/tasks/preprints.py b/document/tasks/preprints.py
index ee63211..1f2d2e2 100644
--- a/document/tasks/preprints.py
+++ b/document/tasks/preprints.py
@@ -3,14 +3,13 @@
 from django.db import DataError
 from django.utils.translation import gettext as _
 
+from config import celery_app
 from core.collectors import preprints as preprints_collector
 from core.utils import date_utils
 from core.utils.request_utils import _get_user
-from document.services import preprints as preprint_service
-
-from config import celery_app
+from document.services import preprint as preprint_service
 
-from .common import _get_collection
+from document.tasks.common import _get_collection
 
 
 def load_preprints_from_preprints_api(
@@ -54,15 +53,20 @@ def load_preprints_from_preprints_api(
             logging.error(
                 "Error saving Preprint Document. Collection: %s, PID: %s. Error: %s",
                 collection_obj,
-                payload.get('pid_generic'),
-                exc
+                payload.get("pid_generic"),
+                exc,
             )
             continue
 
     return True
 
 
-@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (Preprints)"), timelimit=-1, queue="load")
+@celery_app.task(
+    bind=True,
+    name=_("[Metadata] Sync Documents (Preprints)"),
+    timelimit=-1,
+    queue="load",
+)
 def task_load_preprints_into_documents(
     self,
     from_date=None,
diff --git a/document/tasks/scielo_books.py b/document/tasks/scielo_books.py
index ddbd462..493d026 100644
--- a/document/tasks/scielo_books.py
+++ b/document/tasks/scielo_books.py
@@ -3,14 +3,16 @@
 from django.conf import settings
 from django.utils.translation import gettext as _
 
+from collection.models import Collection
+from config import celery_app
 from core.collectors import scielo_books as scielo_books_collector
 from core.utils.request_utils import _get_user
-from document.services import books as document_books_service
-from source.services import books as source_books_service
-
-from config import celery_app
+from document.models import Document
+from document.services import book as document_books_service
+from source.models import Source
+from source.services import book as source_books_service
 
-from .common import get_latest_scielo_books_last_seq
+from document.tasks.common import get_latest_scielo_books_last_seq
 
 
 def load_documents_from_scielo_books(
@@ -25,7 +27,7 @@ def load_documents_from_scielo_books(
 ):
     db_name = db_name or settings.SCIELO_BOOKS_DB_NAME
     limit = limit or settings.SCIELO_BOOKS_LIMIT
-    collection_obj = source_books_service.get_books_collection(collection)
+    collection_obj = Collection.objects.get(acron3=collection)
     monograph_cache = {}
 
     logging.info(
@@ -47,13 +49,13 @@ def load_documents_from_scielo_books(
         raw_id = change.get("id")
 
         if item["deleted"]:
-            delete_source = document_books_service.has_monograph_document_for_raw_id(
+            delete_source = Document.book_exists_for_raw_id(
                 collection_obj,
                 raw_id,
             )
-            document_books_service.delete_document_by_raw_id(collection_obj, raw_id)
+            Document.delete_documents_by_raw_id(collection_obj, raw_id)
             if delete_source:
-                source_books_service.delete_book_source(collection_obj, raw_id)
+                Source.delete_book_source_by_id(collection_obj, raw_id)
             continue
 
         payload = item["payload"] or {}
@@ -164,7 +166,9 @@ def sync_documents_from_scielo_books(
     )
 
 
-@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (SciELO Books - Manual)"), queue="load")
+@celery_app.task(
+    bind=True, name=_("[Metadata] Sync Documents (SciELO Books - Manual)"), queue="load"
+)
 def task_load_documents_from_scielo_books(
     self,
     collection="books",
@@ -192,7 +196,11 @@ def task_load_documents_from_scielo_books(
     )
 
 
-@celery_app.task(bind=True, name=_("[Metadata] Sync Documents (SciELO Books - Incremental)"), queue="load")
+@celery_app.task(
+    bind=True,
+    name=_("[Metadata] Sync Documents (SciELO Books - Incremental)"),
+    queue="load",
+)
 def task_sync_documents_from_scielo_books(
     self,
     collection="books",
@@ -218,7 +226,9 @@ def task_sync_documents_from_scielo_books(
     )
 
 
-def _get_monograph_payload(payload, monograph_cache, base_url=None, db_name=None, headers=None):
+def _get_monograph_payload(
+    payload, monograph_cache, base_url=None, db_name=None, headers=None
+):
     monograph_id = payload.get("monograph")
     if not monograph_id:
         return None
diff --git a/metrics/exceptions.py b/document/tests/__init__.py
similarity index 100%
rename from metrics/exceptions.py
rename to document/tests/__init__.py
diff --git a/document/tests/test_models.py b/document/tests/test_models.py
new file mode 100644
index 0000000..475e7f4
--- /dev/null
+++ b/document/tests/test_models.py
@@ -0,0 +1,113 @@
+from django.test import TestCase
+
+from collection.models import Collection
+from document.models import Document
+from source.models import Source
+
+
+class DocumentIdentifierTests(TestCase):
+    def test_find_by_identifiers_searches_legacy_identifier_fields(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        document = Document.objects.create(
+            collection=collection,
+            document_type=Document.DOCUMENT_TYPE_ARTICLE,
+            document_id="doc-id",
+            pid_v2="pid-v2",
+            pid_v3="pid-v3",
+            pid_generic="pid-generic",
+        )
+
+        for identifier in ("doc-id", "pid-v2", "pid-v3", "pid-generic"):
+            self.assertEqual(
+                Document.find_by_identifiers(
+                    collection,
+                    Document.DOCUMENT_TYPE_ARTICLE,
+                    identifier,
+                ),
+                document,
+            )
+
+        self.assertIsNone(
+            Document.find_by_identifiers(
+                collection,
+                Document.DOCUMENT_TYPE_ARTICLE,
+                "missing",
+            )
+        )
+
+    def test_builds_book_pid_generic_values(self):
+        self.assertEqual(Document.build_book_pid_generic("abcd1"), "book:abcd1")
+        self.assertEqual(
+            Document.build_chapter_pid_generic("abcd1", "18"),
+            "book:abcd1/chapter:18",
+        )
+        self.assertIsNone(Document.build_book_pid_generic(""))
+        self.assertIsNone(Document.build_chapter_pid_generic("abcd1", ""))
+
+    def test_delete_documents_by_raw_id_deletes_collection_documents(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        other_collection = Collection.objects.create(acron3="other", acron2="ot")
+        Document.objects.create(
+            collection=collection,
+            document_type=Document.DOCUMENT_TYPE_BOOK,
+            document_id="book:abcd1",
+            extra_data={"raw_id": "abcd1"},
+        )
+        Document.objects.create(
+            collection=other_collection,
+            document_type=Document.DOCUMENT_TYPE_BOOK,
+            document_id="book:abcd1",
+            extra_data={"raw_id": "abcd1"},
+        )
+
+        deleted_count, _ = Document.delete_documents_by_raw_id(collection, "abcd1")
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(
+            Document.objects.filter(collection=collection, extra_data__raw_id="abcd1")
+            .exists()
+        )
+        self.assertTrue(
+            Document.objects.filter(
+                collection=other_collection,
+                extra_data__raw_id="abcd1",
+            ).exists()
+        )
+
+
+class DocumentMetadataTests(TestCase):
+    def test_metadata_includes_source_context_and_legacy_identifiers(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            scielo_issn="1234-5678",
+            title="Test Journal",
+            identifiers={"scielo_issn": "1234-5678"},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_ARTICLE,
+            document_id="S123456782024000100001",
+            scielo_issn="1234-5678",
+            pid_v2="S123456782024000100001",
+            pid_v3="abc123",
+            title="Test Article",
+            identifiers={"doi": "10.1590/example"},
+            files={"pt": {"path": "/pdf/test.pdf"}},
+            default_lang="en",
+            text_langs=["en", "pt"],
+            publication_date="2024-01-15",
+            publication_year="2024",
+        )
+
+        metadata = list(Document.metadata(collection=collection))
+
+        self.assertEqual(len(metadata), 1)
+        self.assertEqual(metadata[0]["document_type"], Document.DOCUMENT_TYPE_ARTICLE)
+        self.assertEqual(metadata[0]["document_id"], "S123456782024000100001")
+        self.assertEqual(metadata[0]["source_type"], Source.SOURCE_TYPE_JOURNAL)
+        self.assertEqual(metadata[0]["source_id"], "1234-5678")
+        self.assertEqual(metadata[0]["scielo_issn"], "1234-5678")
diff --git a/document/tests.py b/document/tests/test_services.py
similarity index 57%
rename from document/tests.py
rename to document/tests/test_services.py
index 14d9bcd..e30c306 100644
--- a/document/tests.py
+++ b/document/tests/test_services.py
@@ -1,56 +1,66 @@
 from django.test import TestCase
-from unittest.mock import patch
 
 from collection.models import Collection
-from document import tasks as document_tasks
-from source.services import books as source_books_service
+from document.models import Document
+from document.services import article as article_service
+from document.services import book as books_service
+from document.services import dataset as dataset_service
+from document.services import preprint as preprint_service
 from source.models import Source
+from source.services import book as source_books_service
 
-from .models import Document
-from .services import articles as article_service
-from .services import books as books_service
-from .services import datasets as dataset_service
-from .services import preprints as preprint_service
 
-
-class DocumentMetadataTests(TestCase):
-    def test_metadata_includes_source_context_and_legacy_identifiers(self):
+class ArticleServiceTests(TestCase):
+    def test_articlemeta_and_opac_upsert_same_document(self):
         collection = Collection.objects.create(acron3="scl", acron2="sc")
         source = Source.objects.create(
             collection=collection,
             source_type=Source.SOURCE_TYPE_JOURNAL,
             source_id="1234-5678",
             scielo_issn="1234-5678",
+            acronym="testjou",
             title="Test Journal",
             identifiers={"scielo_issn": "1234-5678"},
         )
-        Document.objects.create(
+
+        first = article_service.upsert_article_document_from_articlemeta(
+            {
+                "code": "S123456782024000100001",
+                "title": "Article Title",
+                "pdfs": {"en": {"url": "/pdf/en.pdf"}},
+                "processing_date": "2024-02-10",
+                "publication_date": "2024-01-15",
+                "publication_year": "2024",
+                "default_language": "en",
+                "text_langs": ["en", "pt"],
+                "code_title": ["1234-5678"],
+            },
+            collection=collection,
+            source=source,
+        )
+        second = article_service.upsert_article_document_from_opac(
+            {
+                "pid_v2": "S123456782024000100001",
+                "pid_v3": "S1234-56782024000100001",
+                "title": "Article Title",
+                "journal_acronym": "testjou",
+                "publication_date": "2024-01-15",
+                "default_language": "en",
+                "text_langs": ["en", "pt"],
+            },
             collection=collection,
             source=source,
-            document_type=Document.DOCUMENT_TYPE_ARTICLE,
-            document_id="S123456782024000100001",
-            scielo_issn="1234-5678",
-            pid_v2="S123456782024000100001",
-            pid_v3="abc123",
-            title="Test Article",
-            identifiers={"doi": "10.1590/example"},
-            files={"pt": {"path": "/pdf/test.pdf"}},
-            default_lang="en",
-            text_langs=["en", "pt"],
-            publication_date="2024-01-15",
-            publication_year="2024",
         )
 
-        metadata = list(Document.metadata(collection=collection))
+        self.assertEqual(first.pk, second.pk)
+        self.assertEqual(Document.objects.count(), 1)
+        second.refresh_from_db()
+        self.assertEqual(second.pid_v3, "S1234-56782024000100001")
+        self.assertEqual(second.identifiers["journal_acronym"], "testjou")
 
-        self.assertEqual(len(metadata), 1)
-        self.assertEqual(metadata[0]["document_type"], Document.DOCUMENT_TYPE_ARTICLE)
-        self.assertEqual(metadata[0]["document_id"], "S123456782024000100001")
-        self.assertEqual(metadata[0]["source_type"], Source.SOURCE_TYPE_JOURNAL)
-        self.assertEqual(metadata[0]["source_id"], "1234-5678")
-        self.assertEqual(metadata[0]["scielo_issn"], "1234-5678")
 
-    def test_upsert_monograph_and_part_documents_from_books_payload(self):
+class BookServiceTests(TestCase):
+    def test_upsert_monograph_and_part_documents(self):
         collection = Collection.objects.create(acron3="books", acron2="bk")
         monograph_payload = {
             "TYPE": "Monograph",
@@ -74,13 +84,10 @@ def test_upsert_monograph_and_part_documents_from_books_payload(self):
         }
 
         source = source_books_service.upsert_monograph_source(
-            monograph_payload,
-            collection=collection,
+            monograph_payload, collection=collection
         )
         parent_document = books_service.upsert_monograph_document(
-            monograph_payload,
-            collection=collection,
-            source=source,
+            monograph_payload, collection=collection, source=source
         )
         chapter = books_service.upsert_part_document(
             books_service.enrich_part_payload(part_payload, monograph_payload),
@@ -98,53 +105,8 @@ def test_upsert_monograph_and_part_documents_from_books_payload(self):
         self.assertEqual(chapter.identifiers["book_id"], "abcd1")
         self.assertEqual(chapter.default_lang, "es")
 
-    def test_articlemeta_and_opac_upsert_same_document(self):
-        collection = Collection.objects.create(acron3="scl", acron2="sc")
-        source = Source.objects.create(
-            collection=collection,
-            source_type=Source.SOURCE_TYPE_JOURNAL,
-            source_id="1234-5678",
-            scielo_issn="1234-5678",
-            acronym="testjou",
-            title="Test Journal",
-            identifiers={"scielo_issn": "1234-5678"},
-        )
-
-        first = article_service.upsert_article_document_from_articlemeta(
-            {
-                "code": "S123456782024000100001",
-                "title": "Article Title",
-                "pdfs": {"en": {"url": "/pdf/en.pdf"}},
-                "processing_date": "2024-02-10",
-                "publication_date": "2024-01-15",
-                "publication_year": "2024",
-                "default_language": "en",
-                "text_langs": ["en", "pt"],
-                "code_title": ["1234-5678"],
-            },
-            collection=collection,
-            source=source,
-        )
-        second = article_service.upsert_article_document_from_opac(
-            {
-                "pid_v2": "S123456782024000100001",
-                "pid_v3": "S1234-56782024000100001",
-                "title": "Article Title",
-                "journal_acronym": "testjou",
-                "publication_date": "2024-01-15",
-                "default_language": "en",
-                "text_langs": ["en", "pt"],
-            },
-            collection=collection,
-            source=source,
-        )
-
-        self.assertEqual(first.pk, second.pk)
-        self.assertEqual(Document.objects.count(), 1)
-        second.refresh_from_db()
-        self.assertEqual(second.pid_v3, "S1234-56782024000100001")
-        self.assertEqual(second.identifiers["journal_acronym"], "testjou")
 
+class PreprintServiceTests(TestCase):
     def test_upsert_preprint_document_maps_metadata(self):
         collection = Collection.objects.create(acron3="preprints", acron2="pp")
 
@@ -165,6 +127,8 @@ def test_upsert_preprint_document_maps_metadata(self):
         self.assertEqual(document.pid_generic, "preprint/123")
         self.assertEqual(document.default_lang, "en")
 
+
+class DatasetServiceTests(TestCase):
     def test_upsert_dataset_document_accumulates_files(self):
         collection = Collection.objects.create(acron3="data", acron2="dt")
 
@@ -196,60 +160,3 @@ def test_upsert_dataset_document_accumulates_files(self):
         self.assertEqual(document.document_type, Document.DOCUMENT_TYPE_DATASET)
         self.assertEqual(document.document_id, "10.1234/dataset")
         self.assertEqual(set(document.files.keys()), {"1", "2"})
-
-
-class DocumentBooksSyncTests(TestCase):
-    def test_get_latest_scielo_books_last_seq_uses_documents_and_sources(self):
-        collection = Collection.objects.create(acron3="books", acron2="bk")
-        source = Source.objects.create(
-            collection=collection,
-            source_type=Source.SOURCE_TYPE_BOOK,
-            source_id="book-1",
-            title="Book 1",
-            extra_data={"last_seq": 120},
-        )
-        Document.objects.create(
-            collection=collection,
-            source=source,
-            document_type=Document.DOCUMENT_TYPE_BOOK,
-            document_id="book:book-1",
-            extra_data={"last_seq": "135"},
-        )
-
-        self.assertEqual(document_tasks.get_latest_scielo_books_last_seq("books"), 135)
-
-    def test_sync_documents_from_scielo_books_uses_computed_since(self):
-        collection = Collection.objects.create(acron3="books", acron2="bk")
-        source = Source.objects.create(
-            collection=collection,
-            source_type=Source.SOURCE_TYPE_BOOK,
-            source_id="book-1",
-            title="Book 1",
-            extra_data={"last_seq": 120},
-        )
-        Document.objects.create(
-            collection=collection,
-            source=source,
-            document_type=Document.DOCUMENT_TYPE_BOOK,
-            document_id="book:book-1",
-            extra_data={"last_seq": 135},
-        )
-
-        with patch("document.tasks.scielo_books.load_documents_from_scielo_books", return_value=True) as mocked:
-            result = document_tasks.sync_documents_from_scielo_books(
-                collection="books",
-                db_name="scielobooks_1a",
-                limit=500,
-            )
-
-        self.assertTrue(result)
-        mocked.assert_called_once_with(
-            collection="books",
-            db_name="scielobooks_1a",
-            since=135,
-            limit=500,
-            force_update=True,
-            headers=None,
-            base_url=None,
-            user=None,
-        )
diff --git a/document/tests/test_tasks.py b/document/tests/test_tasks.py
new file mode 100644
index 0000000..9f3a9a1
--- /dev/null
+++ b/document/tests/test_tasks.py
@@ -0,0 +1,72 @@
+from unittest.mock import patch
+
+from django.test import TestCase
+
+from collection.models import Collection
+from document.models import Document
+from document.tasks import common as document_tasks_common
+from document.tasks import scielo_books as document_tasks_scielo_books
+from source.models import Source
+
+
+class DocumentBooksSyncTests(TestCase):
+    def test_get_latest_scielo_books_last_seq_uses_documents_and_sources(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+            source_id="book-1",
+            title="Book 1",
+            extra_data={"last_seq": 120},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_BOOK,
+            document_id="book:book-1",
+            extra_data={"last_seq": "135"},
+        )
+
+        self.assertEqual(
+            document_tasks_common.get_latest_scielo_books_last_seq("books"),
+            135,
+        )
+
+    def test_sync_documents_from_scielo_books_uses_computed_since(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+            source_id="book-1",
+            title="Book 1",
+            extra_data={"last_seq": 120},
+        )
+        Document.objects.create(
+            collection=collection,
+            source=source,
+            document_type=Document.DOCUMENT_TYPE_BOOK,
+            document_id="book:book-1",
+            extra_data={"last_seq": 135},
+        )
+
+        with patch(
+            "document.tasks.scielo_books.load_documents_from_scielo_books",
+            return_value=True,
+        ) as mocked:
+            result = document_tasks_scielo_books.sync_documents_from_scielo_books(
+                collection="books",
+                db_name="scielobooks_1a",
+                limit=500,
+            )
+
+        self.assertTrue(result)
+        mocked.assert_called_once_with(
+            collection="books",
+            db_name="scielobooks_1a",
+            since=135,
+            limit=500,
+            force_update=True,
+            headers=None,
+            base_url=None,
+            user=None,
+        )
diff --git a/document/wagtail_hooks.py b/document/wagtail_hooks.py
index de291c9..51ef5ad 100644
--- a/document/wagtail_hooks.py
+++ b/document/wagtail_hooks.py
@@ -1,7 +1,7 @@
 from django.utils.translation import gettext_lazy as _
 from wagtail.snippets.views.snippets import SnippetViewSet
 
-from .models import Document
+from document.models import Document
 
 
 class DocumentSnippetViewSet(SnippetViewSet):
diff --git a/log_manager/admin.py b/log_manager/admin.py
index 8c38f3f..846f6b4 100644
--- a/log_manager/admin.py
+++ b/log_manager/admin.py
@@ -1,3 +1 @@
-from django.contrib import admin
-
 # Register your models here.
diff --git a/log_manager/choices.py b/log_manager/choices.py
index c6e461a..aa46a54 100644
--- a/log_manager/choices.py
+++ b/log_manager/choices.py
@@ -1,13 +1,12 @@
 from django.utils.translation import gettext_lazy as _
 
-
-LOG_FILE_STATUS_CREATED = 'CRE'
-LOG_FILE_STATUS_QUEUED = 'QUE'
-LOG_FILE_STATUS_PARSING = 'PAR'
-LOG_FILE_STATUS_PROCESSED = 'PRO'
-LOG_FILE_STATUS_ERROR = 'ERR'
-LOG_FILE_STATUS_INVALIDATED = 'INV'
-LOG_FILE_STATUS_IGNORED = 'IGN'
+LOG_FILE_STATUS_CREATED = "CRE"
+LOG_FILE_STATUS_QUEUED = "QUE"
+LOG_FILE_STATUS_PARSING = "PAR"
+LOG_FILE_STATUS_PROCESSED = "PRO"
+LOG_FILE_STATUS_ERROR = "ERR"
+LOG_FILE_STATUS_INVALIDATED = "INV"
+LOG_FILE_STATUS_IGNORED = "IGN"
 
 LOG_FILE_STATUS = [
     (LOG_FILE_STATUS_CREATED, _("Created")),
@@ -18,4 +17,3 @@
     (LOG_FILE_STATUS_INVALIDATED, _("Invalidated")),
     (LOG_FILE_STATUS_IGNORED, _("Ignored")),
 ]
-
diff --git a/log_manager/exceptions.py b/log_manager/exceptions.py
index 27d38e0..12feaa2 100644
--- a/log_manager/exceptions.py
+++ b/log_manager/exceptions.py
@@ -1,20 +1,26 @@
 class LogFileAlreadyExistsError(Exception):
     ...
 
+
 class InvalidDateFormatError(Exception):
     ...
 
+
 class InvalidTemporaReferenceError(Exception):
     ...
 
+
 class UndefinedApplicationConfigError(Exception):
     ...
 
+
 class UndefinedCollectionConfigError(Exception):
     ...
 
+
 class MultipleCollectionConfigError(Exception):
     ...
 
+
 class UnsupportedFileExtentionError(Exception):
-    ...
\ No newline at end of file
+    ...
diff --git a/log_manager/management/__init__.py b/log_manager/management/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/log_manager/management/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/log_manager/management/commands/__init__.py b/log_manager/management/commands/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/log_manager/management/commands/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/log_manager/management/commands/reset_log_catalog.py b/log_manager/management/commands/reset_log_catalog.py
deleted file mode 100644
index 5ded576..0000000
--- a/log_manager/management/commands/reset_log_catalog.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from django.core.management.base import BaseCommand
-from django.db import transaction
-
-from log_manager.models import LogFile
-from metrics.models import DailyMetricJob
-from metrics.services import daily_payloads
-from reports.models import MonthlyLogReport, WeeklyLogReport, YearlyLogReport
-from tracker.models import LogFileDiscardedLine
-
-
-class Command(BaseCommand):
-    help = (
-        "Clear the log catalog stored in the database, including derived parsing "
-        "records, daily metric payloads, and optionally reports, "
-        "while preserving the source log files on disk."
-    )
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            "--collection",
-            action="append",
-            dest="collections",
-            help="Collection acronym to limit cleanup. Repeat the option for multiple collections.",
-        )
-        parser.add_argument(
-            "--reports",
-            action="store_true",
-            default=False,
-            help="Also clear Weekly/Monthly/Yearly log reports for the selected collections.",
-        )
-
-    def handle(self, *args, **options):
-        collections = options.get("collections") or []
-        clear_reports = options.get("reports")
-
-        log_files = LogFile.objects.all()
-        if collections:
-            log_files = log_files.filter(collection__acron3__in=collections)
-
-        log_file_ids = list(log_files.values_list("id", flat=True))
-        if not log_file_ids:
-            self.stdout.write(self.style.WARNING("No log catalog rows found for cleanup."))
-            return
-
-        daily_jobs = DailyMetricJob.objects.all()
-        if collections:
-            daily_jobs = daily_jobs.filter(collection__acron3__in=collections)
-        payload_paths = list(daily_jobs.exclude(storage_path="").values_list("storage_path", flat=True))
-
-        summary = {
-            "log_files": len(log_file_ids),
-            "discarded_lines": LogFileDiscardedLine.objects.filter(
-                log_file_id__in=log_file_ids
-            ).count(),
-            "daily_metric_jobs": daily_jobs.count(),
-        }
-
-        for storage_path in payload_paths:
-            daily_payloads.delete_payload(storage_path)
-
-        with transaction.atomic():
-            LogFileDiscardedLine.objects.filter(log_file_id__in=log_file_ids).delete()
-            daily_jobs.delete()
-            LogFile.objects.filter(id__in=log_file_ids).delete()
-
-            if clear_reports:
-                report_qs = WeeklyLogReport.objects.all()
-                m_qs = MonthlyLogReport.objects.all()
-                y_qs = YearlyLogReport.objects.all()
-                if collections:
-                    report_qs = report_qs.filter(collection__acron3__in=collections)
-                    m_qs = m_qs.filter(collection__acron3__in=collections)
-                    y_qs = y_qs.filter(collection__acron3__in=collections)
-                summary["weekly_reports"] = report_qs.count()
-                summary["monthly_reports"] = m_qs.count()
-                summary["yearly_reports"] = y_qs.count()
-                report_qs.delete()
-                m_qs.delete()
-                y_qs.delete()
-
-        msg = (
-            f"Cleared log catalog: "
-            f"{summary['log_files']} log files, "
-            f"{summary['discarded_lines']} discarded lines, "
-            f"{summary['daily_metric_jobs']} daily metric jobs."
-        )
-        if clear_reports:
-            msg += (
-                f" Also cleared reports: "
-                f"{summary['weekly_reports']} weekly, "
-                f"{summary['monthly_reports']} monthly, "
-                f"{summary['yearly_reports']} yearly."
-            )
-        self.stdout.write(self.style.SUCCESS(msg))
diff --git a/log_manager/models.py b/log_manager/models.py
index 6bf04d8..c6d9895 100644
--- a/log_manager/models.py
+++ b/log_manager/models.py
@@ -7,31 +7,36 @@
 from wagtailautocomplete.edit_handlers import AutocompletePanel
 
 from collection.models import Collection
+from core.utils.date_utils import get_date_obj
 
-from . import choices
+from log_manager import choices
 
 
 class LogFile(models.Model):
     created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
     updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
-    date = models.DateField(verbose_name=_("Date"), null=True, blank=True, db_index=True)
-    hash = models.CharField(_("Hash MD5"), max_length=32, null=True, blank=True, unique=True)
+    date = models.DateField(
+        verbose_name=_("Date"), null=True, blank=True, db_index=True
+    )
+    hash = models.CharField(
+        _("Hash MD5"), max_length=32, null=True, blank=True, unique=True
+    )
 
     path = models.CharField(_("Name"), max_length=255, null=False, blank=False)
 
     stat_result = models.JSONField(_("OS Stat Result"), null=False, blank=False)
 
     status = models.CharField(
-        _("Status"), 
-        choices=choices.LOG_FILE_STATUS, 
-        max_length=3, 
-        blank=False, 
+        _("Status"),
+        choices=choices.LOG_FILE_STATUS,
+        max_length=3,
+        blank=False,
         null=False,
     )
 
     validation = models.JSONField(
-        _("Validation"), 
-        null=True, 
+        _("Validation"),
+        null=True,
         blank=True,
         default=dict,
     )
@@ -64,26 +69,22 @@ class LogFile(models.Model):
     )
 
     panels = [
-        FieldPanel('hash'),
-        FieldPanel('date'),
-        FieldPanel('path'),
-        FieldPanel('stat_result'),
-        FieldPanel('status'),
-        FieldPanel('validation'),
-        FieldPanel('summary'),
-        FieldPanel('last_processed_line'),
-        FieldPanel('parse_heartbeat_at'),
-        AutocompletePanel('collection'),
+        FieldPanel("hash"),
+        FieldPanel("date"),
+        FieldPanel("path"),
+        FieldPanel("stat_result"),
+        FieldPanel("status"),
+        FieldPanel("validation"),
+        FieldPanel("summary"),
+        FieldPanel("last_processed_line"),
+        FieldPanel("parse_heartbeat_at"),
+        AutocompletePanel("collection"),
     ]
 
     class Meta:
         verbose_name = _("Log File")
         verbose_name_plural = _("Log Files")
 
-    @classmethod
-    def get(cls, hash):
-        return cls.objects.get(hash=hash)
-
     @classmethod
     def create_or_update(cls, collection, path, stat_result, hash, status=None):
         try:
@@ -97,17 +98,73 @@ def create_or_update(cls, collection, path, stat_result, hash, status=None):
                 },
             )
         except IntegrityError:
-            obj = cls.get(hash=hash)
+            obj = cls.objects.get(hash=hash)
             created = False
 
         if created:
-            logging.info(f'File {path} added to the database.')
+            logging.info(f"File {path} added to the database.")
         else:
             obj.updated = timezone.now()
             obj.save(update_fields=["updated"])
-            logging.info(f'File {path} already exists in the database.')
+            logging.info(f"File {path} already exists in the database.")
 
         return obj
-        
+
+    @classmethod
+    def for_collection_date(cls, collection, access_date, status_filters=None):
+        queryset = (
+            cls.objects.filter(
+                collection=collection,
+                date=access_date,
+            )
+            .select_related("collection")
+            .order_by("path", "hash")
+        )
+        if status_filters:
+            queryset = queryset.filter(status__in=status_filters)
+
+        return list(queryset)
+
+    @classmethod
+    def for_collection_date_hashes(cls, collection, access_date, log_hashes):
+        return list(
+            cls.objects.filter(
+                collection=collection,
+                date=access_date,
+                hash__in=log_hashes,
+            )
+            .select_related("collection")
+            .order_by("path", "hash")
+        )
+
+    @classmethod
+    def distinct_access_dates_for_parsing(
+        cls,
+        collection,
+        from_date,
+        until_date,
+        status_filters,
+        skip_hashes=None,
+    ):
+        date_queryset = (
+            cls.objects.filter(
+                status__in=status_filters,
+                collection=collection,
+                date__gte=from_date,
+                date__lte=until_date,
+            )
+            .exclude(hash__in=skip_hashes or [])
+            .values_list("date", flat=True)
+            .distinct()
+            .order_by("date")
+        )
+
+        access_dates = set()
+        for value in list(date_queryset):
+            access_date = value if hasattr(value, "isoformat") else get_date_obj(value)
+            if access_date and from_date <= access_date <= until_date:
+                access_dates.add(access_date)
+        return sorted(access_dates)
+
     def __str__(self):
-        return f'{self.path}'
+        return f"{self.path}"
diff --git a/log_manager/services/__init__.py b/log_manager/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/log_manager/services/catalog.py b/log_manager/services/catalog.py
new file mode 100644
index 0000000..fad59b3
--- /dev/null
+++ b/log_manager/services/catalog.py
@@ -0,0 +1,80 @@
+import logging
+import os
+
+from django.conf import settings
+
+from collection.models import Collection
+from core.utils import date_utils
+from log_manager import models, utils
+from log_manager_config import models as lmc_models
+
+
+def catalog_log_files_from_configured_directories(
+    collections=None,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+):
+    from_date_str, until_date_str = date_utils.get_date_range_str(
+        from_date, until_date, days_to_go_back
+    )
+    visible_dates = date_utils.get_date_objs_from_date_range(
+        from_date_str, until_date_str
+    )
+    supported_extensions = settings.SUPPORTED_LOGFILE_EXTENSIONS
+    if not supported_extensions:
+        logging.error("No supported log file extensions configured.")
+
+    for collection_code in collections or Collection.acron3_list():
+        collection = Collection.objects.get(acron3=collection_code)
+        directories = lmc_models.CollectionLogDirectory.objects.filter(
+            config__collection__acron3=collection_code,
+            active=True,
+        )
+        if not directories:
+            logging.error(
+                "No CollectionLogDirectory found for collection %s.", collection_code
+            )
+
+        for directory in directories:
+            _catalog_log_files_in_directory(
+                collection=collection,
+                directory_path=directory.path,
+                visible_dates=visible_dates,
+                supported_extensions=supported_extensions,
+            )
+
+
+def _catalog_log_files_in_directory(
+    collection,
+    directory_path,
+    visible_dates,
+    supported_extensions,
+):
+    for root, _sub_dirs, files in os.walk(directory_path):
+        for name in files:
+            _name, extension = os.path.splitext(name)
+            if extension.lower() not in supported_extensions:
+                continue
+
+            file_path = os.path.join(root, name)
+            file_stat = os.stat(file_path)
+            file_ctime = date_utils.get_date_obj_from_timestamp(file_stat.st_ctime)
+
+            logging.debug("Checking file %s with ctime %s.", file_path, file_ctime)
+            if file_ctime not in visible_dates:
+                continue
+
+            try:
+                models.LogFile.create_or_update(
+                    collection=collection,
+                    path=file_path,
+                    stat_result=file_stat,
+                    hash=utils.hash_file(file_path),
+                )
+            except Exception as exc:
+                logging.error(
+                    "Error cataloging file %s. Error: %s",
+                    file_path,
+                    exc,
+                )
diff --git a/log_manager/services/validation.py b/log_manager/services/validation.py
new file mode 100644
index 0000000..777ac47
--- /dev/null
+++ b/log_manager/services/validation.py
@@ -0,0 +1,199 @@
+import logging
+
+from collection.models import Collection
+from core.utils import date_utils
+from log_manager import choices, models, utils
+from log_manager_config import models as lmc_models
+
+LOGFILE_STAT_RESULT_CTIME_INDEX = 9
+
+
+def get_validation_candidate_hashes_by_collection(
+    collections=None,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    ignore_date=False,
+    revalidate=False,
+    status_list=None,
+):
+    collection_codes = collections or Collection.acron3_list()
+    logging.info("Validating log files for collections: %s.", collection_codes)
+
+    visible_dates = _get_validation_visible_dates(
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        ignore_date=ignore_date,
+    )
+    if visible_dates is None:
+        return None
+
+    status_filter = _get_validation_status_filter(
+        revalidate=revalidate,
+        status_list=status_list,
+    )
+
+    log_hashes_by_collection = {}
+    for collection_code in collection_codes:
+        log_hashes_by_collection[collection_code] = _get_validation_candidate_hashes(
+            collection_code=collection_code,
+            status_filter=status_filter,
+            visible_dates=visible_dates,
+            ignore_date=ignore_date,
+        )
+
+    return log_hashes_by_collection
+
+
+def _get_validation_visible_dates(
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    ignore_date=False,
+):
+    from_date_str, until_date_str = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    visible_dates = date_utils.get_date_objs_from_date_range(
+        from_date_str,
+        until_date_str,
+    )
+
+    if ignore_date:
+        return visible_dates
+
+    if not visible_dates:
+        logging.warning("No visible dates found for log validation.")
+        return None
+
+    logging.info("Interval: %s to %s.", visible_dates[0], visible_dates[-1])
+    return visible_dates
+
+
+def _get_validation_status_filter(revalidate=False, status_list=None):
+    status_filter = [choices.LOG_FILE_STATUS_CREATED]
+
+    if revalidate:
+        status_filter += status_list or [
+            choices.LOG_FILE_STATUS_QUEUED,
+            choices.LOG_FILE_STATUS_INVALIDATED,
+            choices.LOG_FILE_STATUS_ERROR,
+        ]
+
+    return status_filter
+
+
+def _get_validation_candidate_hashes(
+    collection_code,
+    status_filter,
+    visible_dates,
+    ignore_date=False,
+):
+    log_hashes = []
+    log_files = models.LogFile.objects.filter(
+        status__in=status_filter,
+        collection__acron3=collection_code,
+    )
+
+    for log_file in log_files:
+        if not ignore_date and not _log_file_ctime_is_in_date_range(
+            log_file,
+            visible_dates,
+        ):
+            continue
+
+        log_hashes.append(log_file.hash)
+
+    return log_hashes
+
+
+def _log_file_ctime_is_in_date_range(log_file, visible_dates):
+    file_ctime = date_utils.get_date_obj_from_timestamp(
+        log_file.stat_result[LOGFILE_STAT_RESULT_CTIME_INDEX]
+    )
+    return file_ctime in visible_dates
+
+
+def validate_log_file_and_update_status(log_file_hash):
+    log_file = models.LogFile.objects.get(hash=log_file_hash)
+    collection = log_file.collection.acron3
+    buffer_size, sample_size = _get_collection_validation_settings(collection)
+
+    logging.info("Validating log file %s.", log_file.path)
+    validation_result = utils.validate_file(
+        path=log_file.path,
+        buffer_size=buffer_size,
+        sample_size=sample_size,
+    )
+    _normalize_validation_result_for_storage(validation_result)
+
+    _update_log_file_with_validation_result(
+        log_file=log_file,
+        validation_result=validation_result,
+        buffer_size=buffer_size,
+        sample_size=sample_size,
+    )
+
+
+def _get_collection_validation_settings(
+    collection, default_buffer_size=2048, default_sample_size=0.1
+):
+    col_configs = lmc_models.LogManagerCollectionConfig.objects.filter(
+        collection__acron3=collection
+    ).first()
+
+    if not col_configs:
+        logging.warning(
+            "No LogManagerCollectionConfig found for collection %s. Using default values.",
+            collection,
+        )
+        return default_buffer_size, default_sample_size
+
+    return col_configs.buffer_size, col_configs.sample_size
+
+
+def _normalize_validation_result_for_storage(validation_result):
+    if "datetimes" in validation_result.get("content", {}).get("summary", {}):
+        del validation_result["content"]["summary"]["datetimes"]
+
+    if "probably_date" not in validation_result:
+        return
+
+    probably_date = validation_result["probably_date"]
+    if isinstance(probably_date, dict):
+        logging.error("Error determining probably_date: %s", probably_date.get("error"))
+        validation_result["probably_date"] = None
+        return
+
+    try:
+        validation_result["probably_date"] = date_utils.get_date_str(probably_date)
+    except (ValueError, AttributeError) as exc:
+        logging.error("Error serializing probably_date: %s", exc)
+        validation_result["probably_date"] = None
+
+
+def _update_log_file_with_validation_result(
+    log_file,
+    validation_result,
+    buffer_size,
+    sample_size,
+):
+    log_file.validation = validation_result
+    log_file.validation.update({"buffer_size": buffer_size, "sample_size": sample_size})
+
+    if validation_result.get("is_valid", {}).get("all", False):
+        log_file.date = validation_result.get("probably_date") or None
+        log_file.status = choices.LOG_FILE_STATUS_QUEUED
+    else:
+        log_file.status = choices.LOG_FILE_STATUS_INVALIDATED
+
+    logging.info(
+        "Log file %s (%s) has status %s.",
+        log_file.path,
+        log_file.collection.acron3,
+        log_file.status,
+    )
+    log_file.save()
diff --git a/log_manager/tasks.py b/log_manager/tasks.py
index 614106d..a04f22a 100644
--- a/log_manager/tasks.py
+++ b/log_manager/tasks.py
@@ -1,25 +1,15 @@
 import logging
-import os
 
 from celery import chord
-from django.conf import settings
 
-from collection.models import Collection
 from config import celery_app
-from core.utils import date_utils
+from config.collections import get_collection_parse_queue
 from core.utils.request_utils import _get_user
-from log_manager_config import models as lmc_models
-from metrics.services.resources import extract_celery_queue_name
-from metrics.tasks import task_parse_logs
+from log_manager.services import catalog, validation
+from metrics.tasks.log_parsing import task_enqueue_log_parsing_jobs
 
-from . import choices, models, utils
 
-LOGFILE_STAT_RESULT_CTIME_INDEX = 9
-
-
-@celery_app.task(
-    bind=True, name="[Log Pipeline] 1. Search Logs (Manual)", queue="load"
-)
+@celery_app.task(bind=True, name="[Log Pipeline] 1. Search Logs (Manual)", queue="load")
 def task_search_log_files(
     self,
     collections=None,
@@ -38,50 +28,12 @@ def task_search_log_files(
     """
     _get_user(self.request, username=username, user_id=user_id)
 
-    from_date_str, until_date_str = date_utils.get_date_range_str(
-        from_date, until_date, days_to_go_back
-    )
-    visible_dates = date_utils.get_date_objs_from_date_range(
-        from_date_str, until_date_str
+    catalog.catalog_log_files_from_configured_directories(
+        collections=collections,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
     )
-    supported_extensions = settings.SUPPORTED_LOGFILE_EXTENSIONS
-    if not supported_extensions:
-        logging.error("No supported log file extensions configured.")
-
-    for collection_code in collections or Collection.acron3_list():
-        collection = Collection.objects.get(acron3=collection_code)
-        directories = lmc_models.CollectionLogDirectory.objects.filter(
-            config__collection__acron3=collection_code,
-            active=True,
-        )
-        if not directories:
-            logging.error(
-                "No CollectionLogDirectory found for collection %s.", collection_code
-            )
-
-        for directory in directories:
-            for root, _sub_dirs, files in os.walk(directory.path):
-                for name in files:
-                    _name, extension = os.path.splitext(name)
-                    if extension.lower() not in supported_extensions:
-                        continue
-
-                    file_path = os.path.join(root, name)
-                    file_stat = os.stat(file_path)
-                    file_ctime = date_utils.get_date_obj_from_timestamp(
-                        file_stat.st_ctime
-                    )
-
-                    logging.debug(
-                        "Checking file %s with ctime %s.", file_path, file_ctime
-                    )
-                    if file_ctime in visible_dates:
-                        models.LogFile.create_or_update(
-                            collection=collection,
-                            path=file_path,
-                            stat_result=file_stat,
-                            hash=utils.hash_file(file_path),
-                        )
 
     if trigger_validation:
         task_validate_log_files.apply_async(
@@ -122,47 +74,23 @@ def task_validate_log_files(
     When trigger_parse=True, one parse orchestration task is enqueued per
     collection and routed to the proper parse_<size> queue.
     """
-    collection_codes = collections or Collection.acron3_list()
-    logging.info("Validating log files for collections: %s.", collection_codes)
-
-    from_date_str, until_date_str = date_utils.get_date_range_str(
-        from_date, until_date, days_to_go_back
-    )
-    visible_dates = date_utils.get_date_objs_from_date_range(
-        from_date_str, until_date_str
+    log_hashes_by_collection = validation.get_validation_candidate_hashes_by_collection(
+        collections=collections,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        ignore_date=ignore_date,
+        revalidate=revalidate,
+        status_list=status_list,
     )
-    if not ignore_date:
-        if not visible_dates:
-            logging.warning("No visible dates found for log validation.")
-            return
-        logging.info("Interval: %s to %s.", visible_dates[0], visible_dates[-1])
-
-    status_filter = [choices.LOG_FILE_STATUS_CREATED]
-    if revalidate:
-        status_filter += status_list or [
-            choices.LOG_FILE_STATUS_QUEUED,
-            choices.LOG_FILE_STATUS_INVALIDATED,
-            choices.LOG_FILE_STATUS_ERROR,
-        ]
-
-    tasks_by_collection = {}
-    for collection_code in collection_codes:
-        tasks_by_collection[collection_code] = []
-        log_files = models.LogFile.objects.filter(
-            status__in=status_filter,
-            collection__acron3=collection_code,
-        )
-        for log_file in log_files:
-            if not ignore_date:
-                file_ctime = date_utils.get_date_obj_from_timestamp(
-                    log_file.stat_result[LOGFILE_STAT_RESULT_CTIME_INDEX]
-                )
-                if file_ctime not in visible_dates:
-                    continue
+    if log_hashes_by_collection is None:
+        return
 
-            tasks_by_collection[collection_code].append(
-                task_validate_log_file.s(log_file.hash, user_id, username)
-            )
+    tasks_by_collection = _build_validation_tasks(
+        log_hashes_by_collection=log_hashes_by_collection,
+        user_id=user_id,
+        username=username,
+    )
 
     if trigger_parse:
         _enqueue_parse_after_validation(
@@ -189,33 +117,7 @@ def task_validate_log_files(
 def task_validate_log_file(self, log_file_hash, user_id=None, username=None):
     """Validate a single LogFile and update its status."""
     _get_user(self.request, username=username, user_id=user_id)
-    log_file = models.LogFile.objects.get(hash=log_file_hash)
-    collection = log_file.collection.acron3
-
-    buffer_size, sample_size = _fetch_validation_parameters(collection)
-
-    logging.info("Validating log file %s.", log_file.path)
-    val_result = utils.validate_file(
-        path=log_file.path, buffer_size=buffer_size, sample_size=sample_size
-    )
-    _clean_validation_result(val_result)
-
-    log_file.validation = val_result
-    log_file.validation.update({"buffer_size": buffer_size, "sample_size": sample_size})
-
-    if val_result.get("is_valid", {}).get("all", False):
-        log_file.date = val_result.get("probably_date") or None
-        log_file.status = choices.LOG_FILE_STATUS_QUEUED
-    else:
-        log_file.status = choices.LOG_FILE_STATUS_INVALIDATED
-
-    logging.info(
-        "Log file %s (%s) has status %s.",
-        log_file.path,
-        log_file.collection.acron3,
-        log_file.status,
-    )
-    log_file.save()
+    validation.validate_log_file_and_update_status(log_file_hash)
 
 
 @celery_app.task(bind=True, name="[Log Pipeline] Daily Routine (Auto)", queue="load")
@@ -227,6 +129,16 @@ def task_daily_log_ingestion_pipeline(self):
     task_search_log_files.apply_async(kwargs={"trigger_validation": True})
 
 
+def _build_validation_tasks(log_hashes_by_collection, user_id, username):
+    return {
+        collection_code: [
+            task_validate_log_file.s(log_file_hash, user_id, username)
+            for log_file_hash in log_hashes
+        ]
+        for collection_code, log_hashes in log_hashes_by_collection.items()
+    }
+
+
 def _enqueue_parse_after_validation(
     tasks_by_collection, from_date, until_date, days_to_go_back, user_id, username
 ):
@@ -243,7 +155,7 @@ def _enqueue_parse_after_validation(
                 )
             )
         else:
-            task_parse_logs.apply_async(
+            task_enqueue_log_parsing_jobs.apply_async(
                 **_build_parse_apply_kwargs(
                     collection_code,
                     from_date,
@@ -266,7 +178,7 @@ def _build_parse_signature(
         user_id,
         username,
     )
-    parse_callback = task_parse_logs.si(**apply_kwargs["kwargs"])
+    parse_callback = task_enqueue_log_parsing_jobs.si(**apply_kwargs["kwargs"])
     if apply_kwargs.get("queue"):
         parse_callback.set(queue=apply_kwargs["queue"])
     return parse_callback
@@ -276,7 +188,7 @@ def _build_parse_apply_kwargs(
     collection_code, from_date, until_date, days_to_go_back, user_id, username
 ):
     collections = [collection_code]
-    parse_queue = extract_celery_queue_name(collection_code)
+    parse_queue = get_collection_parse_queue(collection_code)
     apply_kwargs = {
         "kwargs": {
             "collections": collections,
@@ -290,38 +202,3 @@ def _build_parse_apply_kwargs(
         "queue": parse_queue,
     }
     return apply_kwargs
-
-
-def _fetch_validation_parameters(
-    collection, default_buffer_size=0.1, default_sample_size=2048
-):
-    col_configs = lmc_models.LogManagerCollectionConfig.objects.filter(
-        collection__acron3=collection
-    ).first()
-    if not col_configs:
-        logging.warning(
-            "No LogManagerCollectionConfig found for collection %s. Using default values.",
-            collection,
-        )
-        return default_buffer_size, default_sample_size
-    return col_configs.buffer_size, col_configs.sample_size
-
-
-def _clean_validation_result(val_result):
-    if "datetimes" in val_result.get("content", {}).get("summary", {}):
-        del val_result["content"]["summary"]["datetimes"]
-
-    if "probably_date" not in val_result:
-        return
-
-    probably_date = val_result["probably_date"]
-    if isinstance(probably_date, dict):
-        logging.error("Error determining probably_date: %s", probably_date.get("error"))
-        val_result["probably_date"] = None
-        return
-
-    try:
-        val_result["probably_date"] = date_utils.get_date_str(probably_date)
-    except (ValueError, AttributeError) as exc:
-        logging.error("Error serializing probably_date: %s", exc)
-        val_result["probably_date"] = None
diff --git a/log_manager/tests.py b/log_manager/tests.py
deleted file mode 100644
index 8832e25..0000000
--- a/log_manager/tests.py
+++ /dev/null
@@ -1,89 +0,0 @@
-from unittest.mock import patch
-
-from django.db import IntegrityError
-from django.test import TestCase
-
-from collection.models import Collection
-
-from . import choices, tasks
-from .models import LogFile
-
-
-class LogFileTests(TestCase):
-    def setUp(self):
-        self.collection = Collection.objects.create(acron3="books", acron2="bk")
-
-    def test_create_or_update_creates_log_file(self):
-        log_file = LogFile.create_or_update(
-            collection=self.collection,
-            path="/tmp/new.log.gz",
-            stat_result={"size": 10},
-            hash="1" * 32,
-        )
-
-        self.assertEqual(log_file.collection, self.collection)
-        self.assertEqual(log_file.path, "/tmp/new.log.gz")
-        self.assertEqual(log_file.status, choices.LOG_FILE_STATUS_CREATED)
-
-    def test_create_or_update_refetches_existing_log_after_integrity_error(self):
-        existing = LogFile.objects.create(
-            collection=self.collection,
-            path="/tmp/existing.log.gz",
-            stat_result={"size": 10},
-            hash="1" * 32,
-            status=choices.LOG_FILE_STATUS_CREATED,
-        )
-
-        with patch.object(LogFile.objects, "get_or_create", side_effect=IntegrityError):
-            log_file = LogFile.create_or_update(
-                collection=self.collection,
-                path="/tmp/existing.log.gz",
-                stat_result={"size": 10},
-                hash=existing.hash,
-            )
-
-        self.assertEqual(log_file.pk, existing.pk)
-
-
-class ValidateLogFilesTaskTests(TestCase):
-    def test_validate_log_files_returns_for_empty_visible_date_range(self):
-        with patch("log_manager.tasks.task_validate_log_file.s") as mocked_signature:
-            result = tasks.task_validate_log_files.run(
-                collections=["books"],
-                from_date="2024-02-02",
-                until_date="2024-02-01",
-            )
-
-        self.assertIsNone(result)
-        mocked_signature.assert_not_called()
-
-    def test_validate_log_files_routes_parse_callback_to_collection_parse_queue(self):
-        with patch("metrics.tasks.task_parse_logs.apply_async") as mocked_apply_async:
-            tasks.task_validate_log_files.run(
-                collections=["books"],
-                from_date="2024-02-01",
-                until_date="2024-02-02",
-                trigger_parse=True,
-            )
-
-        mocked_apply_async.assert_called_once()
-        self.assertEqual(mocked_apply_async.call_args.kwargs["queue"], "parse_small")
-        self.assertEqual(
-            mocked_apply_async.call_args.kwargs["kwargs"]["queue_name"],
-            "parse_small",
-        )
-
-    def test_validate_log_files_routes_each_collection_parse_to_its_queue(self):
-        with patch("metrics.tasks.task_parse_logs.apply_async") as mocked_apply_async:
-            tasks.task_validate_log_files.run(
-                collections=["books", "scl"],
-                from_date="2024-02-01",
-                until_date="2024-02-02",
-                trigger_parse=True,
-            )
-
-        calls = {
-            call.kwargs["kwargs"]["collections"][0]: call.kwargs["queue"]
-            for call in mocked_apply_async.call_args_list
-        }
-        self.assertEqual(calls, {"books": "parse_small", "scl": "parse_xlarge"})
diff --git a/log_manager/tests/__init__.py b/log_manager/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/log_manager/tests/test_models.py b/log_manager/tests/test_models.py
new file mode 100644
index 0000000..85eada8
--- /dev/null
+++ b/log_manager/tests/test_models.py
@@ -0,0 +1,44 @@
+from unittest.mock import patch
+
+from django.db import IntegrityError
+from django.test import TestCase
+
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+
+
+class LogFileModelTests(TestCase):
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+
+    def test_create_or_update_creates_log_file(self):
+        log_file = LogFile.create_or_update(
+            collection=self.collection,
+            path="/tmp/new.log.gz",
+            stat_result={"size": 10},
+            hash="1" * 32,
+        )
+
+        self.assertEqual(log_file.collection, self.collection)
+        self.assertEqual(log_file.path, "/tmp/new.log.gz")
+        self.assertEqual(log_file.status, choices.LOG_FILE_STATUS_CREATED)
+
+    def test_create_or_update_refetches_existing_after_integrity_error(self):
+        existing = LogFile.objects.create(
+            collection=self.collection,
+            path="/tmp/existing.log.gz",
+            stat_result={"size": 10},
+            hash="1" * 32,
+            status=choices.LOG_FILE_STATUS_CREATED,
+        )
+
+        with patch.object(LogFile.objects, "get_or_create", side_effect=IntegrityError):
+            log_file = LogFile.create_or_update(
+                collection=self.collection,
+                path="/tmp/existing.log.gz",
+                stat_result={"size": 10},
+                hash=existing.hash,
+            )
+
+        self.assertEqual(log_file.pk, existing.pk)
diff --git a/log_manager/tests/test_tasks.py b/log_manager/tests/test_tasks.py
new file mode 100644
index 0000000..79d1db7
--- /dev/null
+++ b/log_manager/tests/test_tasks.py
@@ -0,0 +1,53 @@
+from unittest.mock import patch
+
+from django.test import TestCase
+
+from log_manager import tasks
+
+
+class ValidateLogFilesTaskTests(TestCase):
+    def test_returns_none_for_empty_date_range(self):
+        with patch("log_manager.tasks.task_validate_log_file.s") as mocked_signature:
+            result = tasks.task_validate_log_files.run(
+                collections=["books"],
+                from_date="2024-02-02",
+                until_date="2024-02-01",
+            )
+
+        self.assertIsNone(result)
+        mocked_signature.assert_not_called()
+
+    def test_routes_parse_callback_to_collection_queue(self):
+        with patch(
+            "log_manager.tasks.task_enqueue_log_parsing_jobs.apply_async"
+        ) as mocked_apply_async:
+            tasks.task_validate_log_files.run(
+                collections=["books"],
+                from_date="2024-02-01",
+                until_date="2024-02-02",
+                trigger_parse=True,
+            )
+
+        mocked_apply_async.assert_called_once()
+        self.assertEqual(mocked_apply_async.call_args.kwargs["queue"], "parse_small")
+        self.assertEqual(
+            mocked_apply_async.call_args.kwargs["kwargs"]["queue_name"],
+            "parse_small",
+        )
+
+    def test_routes_each_collection_to_its_queue(self):
+        with patch(
+            "log_manager.tasks.task_enqueue_log_parsing_jobs.apply_async"
+        ) as mocked_apply_async:
+            tasks.task_validate_log_files.run(
+                collections=["books", "scl"],
+                from_date="2024-02-01",
+                until_date="2024-02-02",
+                trigger_parse=True,
+            )
+
+        calls = {
+            call.kwargs["kwargs"]["collections"][0]: call.kwargs["queue"]
+            for call in mocked_apply_async.call_args_list
+        }
+        self.assertEqual(calls, {"books": "parse_small", "scl": "parse_xlarge"})
diff --git a/log_manager/tests/test_validation.py b/log_manager/tests/test_validation.py
new file mode 100644
index 0000000..957faf0
--- /dev/null
+++ b/log_manager/tests/test_validation.py
@@ -0,0 +1,81 @@
+import tempfile
+from datetime import date
+from unittest.mock import patch
+
+from django.test import TestCase
+
+from collection.models import Collection
+from log_manager import choices, utils
+from log_manager.models import LogFile
+from log_manager.services import validation
+
+
+class ValidationServiceTests(TestCase):
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="scl", acron2="sc")
+
+    def test_validation_settings_defaults_match_validator_arguments(self):
+        buffer_size, sample_size = validation._get_collection_validation_settings(
+            self.collection.acron3
+        )
+
+        self.assertEqual(buffer_size, 2048)
+        self.assertEqual(sample_size, 0.1)
+
+    @patch("log_manager.utils.validator.pipeline_validate")
+    @patch("log_manager.utils.validator.get_total_lines", return_value=10)
+    def test_validate_file_clamps_sample_size_to_avoid_zero_range_step(
+        self, mock_get_total_lines, mock_pipeline_validate
+    ):
+        utils.validate_file("/tmp/access.log", sample_size=2048, buffer_size=2048)
+
+        mock_get_total_lines.assert_called_once_with(
+            path="/tmp/access.log",
+            buffer_size=2048,
+        )
+        self.assertEqual(mock_pipeline_validate.call_args.kwargs["sample_size"], 1.0)
+
+    @patch("log_manager.utils.validator.validate_path_name", return_value={"all": True})
+    def test_validate_file_returns_invalid_result_for_empty_log(
+        self, mock_validate_path_name
+    ):
+        with tempfile.NamedTemporaryFile("w", encoding="utf-8") as tmp_file:
+            path = tmp_file.name
+
+            result = utils.validate_file(path, sample_size=1.0, buffer_size=2048)
+
+        self.assertFalse(result["is_valid"]["all"])
+        self.assertEqual(
+            result["content"]["summary"]["total_lines"]["error"],
+            "File is empty",
+        )
+        self.assertIsNone(result["probably_date"])
+
+    @patch("log_manager.services.validation.utils.validate_file")
+    def test_validate_log_file_updates_status_and_normalizes_result(
+        self, mock_validate_file
+    ):
+        log_file = LogFile.objects.create(
+            collection=self.collection,
+            path="/tmp/access.log",
+            stat_result={"size": 10},
+            hash="2" * 32,
+            status=choices.LOG_FILE_STATUS_CREATED,
+        )
+        mock_validate_file.return_value = {
+            "probably_date": date(2026, 5, 10),
+            "is_valid": {"all": True},
+            "content": {
+                "summary": {
+                    "datetimes": ["2026-05-10T00:00:00"],
+                },
+            },
+        }
+
+        validation.validate_log_file_and_update_status(log_file.hash)
+
+        log_file.refresh_from_db()
+        self.assertEqual(log_file.status, choices.LOG_FILE_STATUS_QUEUED)
+        self.assertEqual(log_file.date, date(2026, 5, 10))
+        self.assertNotIn("datetimes", log_file.validation["content"]["summary"])
+        self.assertEqual(log_file.validation["probably_date"], "2026-05-10")
diff --git a/log_manager/utils.py b/log_manager/utils.py
index c7dd2db..16a996f 100644
--- a/log_manager/utils.py
+++ b/log_manager/utils.py
@@ -2,7 +2,7 @@
 import hashlib
 from collections import deque
 
-from scielo_log_validator import validator
+from scielo_log_validator import exceptions, validator
 
 
 def hash_file(path, num_lines=500):
@@ -27,28 +27,90 @@ def hash_file(path, num_lines=500):
 
     opener = gzip.open if _is_gzip(path) else open
 
-    with opener(path, 'rb') as file:
-        first_lines = b''.join([file.readline() for _ in range(num_lines)])
+    with opener(path, "rb") as file:
+        first_lines = b"".join([file.readline() for _ in range(num_lines)])
         md5_hash.update(first_lines)
 
         tail = deque(maxlen=num_lines)
         for line in file:
             tail.append(line)
-        md5_hash.update(b''.join(tail))
+        md5_hash.update(b"".join(tail))
 
     return md5_hash.hexdigest()
 
 
 def _is_gzip(path):
-    with open(path, 'rb') as f:
-        return f.read(2) == b'\x1f\x8b'
+    with open(path, "rb") as f:
+        return f.read(2) == b"\x1f\x8b"
+
+
+def validate_file(
+    path,
+    sample_size=0.1,
+    buffer_size=2048,
+    days_delta=5,
+    apply_path_validation=True,
+    apply_content_validation=True,
+):
+    if apply_content_validation:
+        if _is_empty_log_file(path, buffer_size):
+            return _empty_log_validation_result(path, apply_path_validation)
+
+        sample_size = _safe_sample_size(path, sample_size, buffer_size)
 
-def validate_file(path, sample_size=0.1, buffer_size=2048, days_delta=5, apply_path_validation=True, apply_content_validation=True):
     return validator.pipeline_validate(
-        path=path, 
+        path=path,
         sample_size=sample_size,
         buffer_size=buffer_size,
         days_delta=days_delta,
         apply_path_validation=apply_path_validation,
         apply_content_validation=apply_content_validation,
     )
+
+
+def _is_empty_log_file(path, buffer_size):
+    try:
+        opener = gzip.open if _is_gzip(path) else open
+        with opener(path, "rb") as file:
+            return file.readline() == b""
+    except OSError:
+        return False
+
+
+def _empty_log_validation_result(path, apply_path_validation):
+    result = {
+        "mode": {
+            "path_validation": apply_path_validation,
+            "content_validation": True,
+        },
+        "content": {"summary": {"total_lines": {"error": "File is empty"}}},
+        "is_valid": {"ips": False, "dates": False, "all": False},
+        "probably_date": None,
+    }
+
+    if apply_path_validation:
+        result["path"] = validator.validate_path_name(path)
+
+    return result
+
+
+def _safe_sample_size(path, sample_size, buffer_size):
+    try:
+        total_lines = validator.get_total_lines(path=path, buffer_size=buffer_size)
+    except (
+        exceptions.TruncatedLogFileError,
+        exceptions.InvalidLogFileMimeError,
+        exceptions.LogFileIsEmptyError,
+    ):
+        return sample_size
+
+    if total_lines <= 1:
+        return 1.0
+
+    if sample_size >= 1.0:
+        return 1.0
+
+    if int(total_lines * sample_size) <= 0:
+        return 1.0 / total_lines
+
+    return sample_size
diff --git a/log_manager/views.py b/log_manager/views.py
index 91ea44a..60f00ef 100644
--- a/log_manager/views.py
+++ b/log_manager/views.py
@@ -1,3 +1 @@
-from django.shortcuts import render
-
 # Create your views here.
diff --git a/log_manager/wagtail_hooks.py b/log_manager/wagtail_hooks.py
index 1548ad3..cf7e908 100644
--- a/log_manager/wagtail_hooks.py
+++ b/log_manager/wagtail_hooks.py
@@ -1,13 +1,12 @@
 from django.utils.translation import gettext_lazy as _
-from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
 from wagtail.snippets.models import register_snippet
+from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
 
 from config.menu import get_menu_order
+from log_manager.models import LogFile
 from log_manager_config.wagtail_hooks import LogManagerCollectionConfigSnippetViewSet
 from metrics.wagtail_hooks import DailyMetricJobSnippetViewSet
 
-from log_manager.models import LogFile
-
 
 class LogFileSnippetViewSet(SnippetViewSet):
     model = LogFile
@@ -16,27 +15,27 @@ class LogFileSnippetViewSet(SnippetViewSet):
     menu_order = 500
     list_display = (
         "path",
-        "collection", 
-        "status", 
+        "collection",
+        "status",
         "date",
         "validation",
         "summary",
         "last_processed_line",
         "parse_heartbeat_at",
-        "hash"
+        "hash",
     )
     list_filter = ("status", "collection", "date")
     search_fields = ("path", "hash", "collection__acron3", "collection__main_name")
 
 
 class LogSnippetViewSetGroup(SnippetViewSetGroup):
-    menu_name = 'log_manager'
+    menu_name = "log_manager"
     menu_label = _("Log Manager")
     menu_icon = "folder-open-inverse"
     menu_order = get_menu_order("log_manager")
     items = (
         LogManagerCollectionConfigSnippetViewSet,
-        LogFileSnippetViewSet, 
+        LogFileSnippetViewSet,
         DailyMetricJobSnippetViewSet,
     )
 
diff --git a/log_manager_config/admin.py b/log_manager_config/admin.py
index 8c38f3f..846f6b4 100644
--- a/log_manager_config/admin.py
+++ b/log_manager_config/admin.py
@@ -1,3 +1 @@
-from django.contrib import admin
-
 # Register your models here.
diff --git a/log_manager_config/exceptions.py b/log_manager_config/exceptions.py
index 0a6a6a9..de5e309 100644
--- a/log_manager_config/exceptions.py
+++ b/log_manager_config/exceptions.py
@@ -1,8 +1,10 @@
 class UndefinedCollectionLogDirectoryError(Exception):
     ...
 
+
 class UndefinedCollectionEmailError(Exception):
     ...
 
+
 class UndefinedSupportedLogFile(Exception):
     ...
diff --git a/log_manager_config/models.py b/log_manager_config/models.py
index 35b5f90..f8fc106 100644
--- a/log_manager_config/models.py
+++ b/log_manager_config/models.py
@@ -3,39 +3,37 @@
 from django.db import models
 from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
-
-from modelcluster.models import ClusterableModel
 from modelcluster.fields import ParentalKey
-from wagtail.models import Orderable
+from modelcluster.models import ClusterableModel
 from wagtail.admin.panels import FieldPanel, InlinePanel
+from wagtail.models import Orderable
 from wagtailautocomplete.edit_handlers import AutocompletePanel
 
 from collection.models import Collection
 from core.models import CommonControlField
 
 
-
 class LogManagerCollectionConfig(ClusterableModel, CommonControlField):
     collection = models.OneToOneField(
         Collection,
-        verbose_name=_('Collection'),
+        verbose_name=_("Collection"),
         on_delete=models.CASCADE,
-        related_name="log_manager_config"
+        related_name="log_manager_config",
     )
     sample_size = models.FloatField(
-        verbose_name=_('Sample Size'),
+        verbose_name=_("Sample Size"),
         blank=False,
         null=False,
         default=0.1,
     )
     buffer_size = models.IntegerField(
-        verbose_name=_('Buffer Size'),
+        verbose_name=_("Buffer Size"),
         blank=False,
         null=False,
         default=2048,
     )
     expected_logs_per_day = models.IntegerField(
-        verbose_name=_('Expected Logs Per Day'),
+        verbose_name=_("Expected Logs Per Day"),
         default=1,
     )
 
@@ -49,17 +47,17 @@ class LogManagerCollectionConfig(ClusterableModel, CommonControlField):
     ]
 
     def __str__(self):
-        return f'{self.collection.acron3} Config'
+        return f"{self.collection.acron3} Config"
 
     class Meta:
-        verbose_name = _('Log Manager Collection Config')
-        verbose_name_plural = _('Log Manager Collection Configs')
+        verbose_name = _("Log Manager Collection Config")
+        verbose_name_plural = _("Log Manager Collection Configs")
 
     @classmethod
     def load(cls, data, user):
         for item in data:
             try:
-                collection = Collection.objects.get(acron3=item.get('acronym'))
+                collection = Collection.objects.get(acron3=item.get("acronym"))
             except Collection.DoesNotExist:
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
@@ -67,9 +65,9 @@ def load(cls, data, user):
             cls.create_or_update(
                 user=user,
                 collection=collection,
-                sample_size=item.get('sample_size', 0.1),
-                buffer_size=item.get('buffer_size', 2048),
-                expected_logs_per_day=item.get('quantity', 1),
+                sample_size=item.get("sample_size", 0.1),
+                buffer_size=item.get("buffer_size", 2048),
+                expected_logs_per_day=item.get("quantity", 1),
             )
 
     @classmethod
@@ -85,58 +83,59 @@ def create_or_update(
         if created:
             obj.creator = user
             obj.created = timezone.now()
-        
+
         obj.updated_by = user
         obj.updated = timezone.now()
         obj.sample_size = sample_size
         obj.buffer_size = buffer_size
         obj.expected_logs_per_day = expected_logs_per_day
         obj.save()
-        logging.info(f'Config for {collection.acron3} updated.')
+        logging.info(f"Config for {collection.acron3} updated.")
         return obj
 
 
-
 class CollectionLogDirectory(Orderable, CommonControlField):
     config = ParentalKey(
-        'LogManagerCollectionConfig',
-        related_name='directories',
+        "LogManagerCollectionConfig",
+        related_name="directories",
         on_delete=models.CASCADE,
         null=True,
         blank=True,
     )
     path = models.CharField(
-        verbose_name=_('Path'),
-        max_length=255, 
-        blank=False, 
+        verbose_name=_("Path"),
+        max_length=255,
+        blank=False,
         null=False,
     )
     directory_name = models.CharField(
-        verbose_name=_('Directory Name'),
-        max_length=255, 
+        verbose_name=_("Directory Name"),
+        max_length=255,
         blank=True,
         null=True,
     )
     active = models.BooleanField(
-        verbose_name=_('Active'),
+        verbose_name=_("Active"),
         default=True,
     )
     translator_class = models.CharField(
-        verbose_name=_('URL Translator Class'),
+        verbose_name=_("URL Translator Class"),
         blank=False,
         null=False,
-        default='classic',
+        default="classic",
     )
 
     def __str__(self):
-        return f'{self.config.collection} - {self.path} - {self.directory_name}'
-    
+        return f"{self.config.collection} - {self.path} - {self.directory_name}"
+
     @classmethod
     def load(cls, data, user):
         for item in data:
             try:
-                collection = Collection.objects.get(acron3=item.get('acronym'))
-                config, _ = LogManagerCollectionConfig.objects.get_or_create(collection=collection)
+                collection = Collection.objects.get(acron3=item.get("acronym"))
+                config, _ = LogManagerCollectionConfig.objects.get_or_create(
+                    collection=collection
+                )
             except Collection.DoesNotExist:
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
@@ -145,10 +144,10 @@ def load(cls, data, user):
             cls.create_or_update(
                 user=user,
                 config=config,
-                directory_name=item.get('directory_name'),
-                path=item.get('path'),
-                active=item.get('active', True),
-                translator_class=item.get('translator_class', 'classic'),
+                directory_name=item.get("directory_name"),
+                path=item.get("path"),
+                active=item.get("active", True),
+                translator_class=item.get("translator_class", "classic"),
             )
 
     @classmethod
@@ -159,7 +158,7 @@ def create_or_update(
         directory_name,
         path,
         active,
-        translator_class='classic',
+        translator_class="classic",
     ):
         try:
             obj = cls.objects.get(config=config, path=path)
@@ -168,66 +167,69 @@ def create_or_update(
             obj.creator = user
             obj.created = timezone.now()
             obj.config = config
-        
+
         obj.updated_by = user
         obj.updated = timezone.now()
         obj.directory_name = directory_name
         obj.path = path
         obj.active = active
-        obj.translator_class = translator_class or 'classic'
-     
+        obj.translator_class = translator_class or "classic"
+
         obj.save()
-        logging.info(f'{config.collection.acron3} - {directory_name} - {path}')
+        logging.info(f"{config.collection.acron3} - {directory_name} - {path}")
         return obj
 
     class Meta:
-        verbose_name = _('Collection Log Directory')
-        verbose_name_plural = _('Collection Log Directories')
+        verbose_name = _("Collection Log Directory")
+        verbose_name_plural = _("Collection Log Directories")
         constraints = [
-            models.UniqueConstraint(fields=['config', 'path'], name='unique_config_path')
+            models.UniqueConstraint(
+                fields=["config", "path"], name="unique_config_path"
+            )
         ]
 
 
-
 class CollectionEmail(Orderable, CommonControlField):
     config = ParentalKey(
-        'LogManagerCollectionConfig',
-        related_name='emails',
+        "LogManagerCollectionConfig",
+        related_name="emails",
         on_delete=models.CASCADE,
         null=True,
         blank=True,
     )
     name = models.CharField(
-        verbose_name=_('Name'),
-        max_length=255, 
+        verbose_name=_("Name"),
+        max_length=255,
         blank=True,
         null=True,
     )
     position = models.CharField(
-        verbose_name=_('Position'),
-        max_length=255, 
+        verbose_name=_("Position"),
+        max_length=255,
         blank=True,
         null=True,
     )
     email = models.EmailField(
-        verbose_name=_('E-mail'),
+        verbose_name=_("E-mail"),
         blank=False,
         null=False,
     )
     active = models.BooleanField(
-        verbose_name=_('Active'),
+        verbose_name=_("Active"),
         default=True,
     )
 
     def __str__(self):
-        return f'{self.email} - {self.name}'
-    
+        return f"{self.email} - {self.name}"
+
     @classmethod
     def load(cls, data, user):
         for item in data:
             try:
-                collection = Collection.objects.get(acron3=item.get('acronym'))
-                config, _ = LogManagerCollectionConfig.objects.get_or_create(collection=collection)
+                collection = Collection.objects.get(acron3=item.get("acronym"))
+                config, _ = LogManagerCollectionConfig.objects.get_or_create(
+                    collection=collection
+                )
             except Collection.DoesNotExist:
                 logging.warning(f'Collection {item.get("acronym")} not found.')
                 continue
@@ -236,10 +238,10 @@ def load(cls, data, user):
             cls.create_or_update(
                 user=user,
                 config=config,
-                email=item.get('e-mail'),
-                name=item.get('name'),
-                position=item.get('position'),
-                active=item.get('active', True),
+                email=item.get("e-mail"),
+                name=item.get("name"),
+                position=item.get("position"),
+                active=item.get("active", True),
             )
 
     @classmethod
@@ -262,19 +264,20 @@ def create_or_update(
             obj.email = email
 
         obj.updated_by = user
-        obj.updated = timezone.now()        
+        obj.updated = timezone.now()
         obj.name = name
         obj.position = position
         obj.active = active
-        
+
         obj.save()
-        logging.info(f'{config.collection.acron3} - {name} - {position} - {email}')
+        logging.info(f"{config.collection.acron3} - {name} - {position} - {email}")
         return obj
-    
+
     class Meta:
-        verbose_name = _('Collection Email')
-        verbose_name_plural = _('Collection Emails')
+        verbose_name = _("Collection Email")
+        verbose_name_plural = _("Collection Emails")
         constraints = [
-            models.UniqueConstraint(fields=['config', 'email'], name='unique_config_email')
+            models.UniqueConstraint(
+                fields=["config", "email"], name="unique_config_email"
+            )
         ]
-
diff --git a/log_manager_config/tasks.py b/log_manager_config/tasks.py
index 415dbf9..6c36df3 100644
--- a/log_manager_config/tasks.py
+++ b/log_manager_config/tasks.py
@@ -1,23 +1,27 @@
-from django.conf import settings
-
 from config import celery_app
-from config.collections import COLLECTION_SIZE_SAMPLE_MAP, LOG_MANAGER_SEED_DATA
+from config.collections import (
+    COLLECTION_SIZE_SAMPLE_MAP,
+    LOG_MANAGER_SEED_DATA,
+    get_collection_size,
+)
 from core.utils.request_utils import _get_user
 
-from . import models
+from log_manager_config import models
 
 
-@celery_app.task(bind=True, name='[Log Pipeline] Load Log Manager Settings (Seed)')
-def task_load_log_manager_collection_settings(self, data=None, user_id=None, username=None):
+@celery_app.task(bind=True, name="[Log Pipeline] Load Log Manager Settings (Seed)")
+def task_load_log_manager_collection_settings(
+    self, data=None, user_id=None, username=None
+):
     user = _get_user(self.request, username=username, user_id=user_id)
 
     if not data:
         data = LOG_MANAGER_SEED_DATA
 
         for i in data:
-            size = getattr(settings, 'COLLECTION_ACRON3_SIZE_MAP', {}).get(i['acronym'], 'small')
-            i['sample_size'] = COLLECTION_SIZE_SAMPLE_MAP.get(size, 1.0)
-            i['buffer_size'] = 2048
+            size = get_collection_size(i["acronym"])
+            i["sample_size"] = COLLECTION_SIZE_SAMPLE_MAP.get(size, 1.0)
+            i["buffer_size"] = 2048
 
     models.LogManagerCollectionConfig.load(data, user)
     models.CollectionLogDirectory.load(data, user)
diff --git a/log_manager_config/tests.py b/log_manager_config/tests.py
deleted file mode 100644
index 7ce503c..0000000
--- a/log_manager_config/tests.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.test import TestCase
-
-# Create your tests here.
diff --git a/log_manager_config/tests/__init__.py b/log_manager_config/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/log_manager_config/tests/test_models.py b/log_manager_config/tests/test_models.py
new file mode 100644
index 0000000..6c1dad3
--- /dev/null
+++ b/log_manager_config/tests/test_models.py
@@ -0,0 +1,84 @@
+from django.test import TestCase
+
+from collection.models import Collection
+from core.users.tests.factories import UserFactory
+from log_manager_config.models import CollectionLogDirectory, LogManagerCollectionConfig
+
+
+class LogManagerCollectionConfigTests(TestCase):
+    def setUp(self):
+        self.user = UserFactory()
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+
+    def test_create_or_update_creates_config(self):
+        config = LogManagerCollectionConfig.create_or_update(
+            user=self.user,
+            collection=self.collection,
+            sample_size=0.2,
+            buffer_size=4096,
+            expected_logs_per_day=3,
+        )
+
+        self.assertEqual(config.collection, self.collection)
+        self.assertEqual(config.sample_size, 0.2)
+        self.assertEqual(config.buffer_size, 4096)
+        self.assertEqual(config.expected_logs_per_day, 3)
+
+    def test_create_or_update_updates_existing(self):
+        LogManagerCollectionConfig.create_or_update(
+            user=self.user,
+            collection=self.collection,
+            sample_size=0.1,
+            buffer_size=2048,
+            expected_logs_per_day=1,
+        )
+        config = LogManagerCollectionConfig.create_or_update(
+            user=self.user,
+            collection=self.collection,
+            sample_size=0.5,
+            buffer_size=8192,
+            expected_logs_per_day=5,
+        )
+
+        self.assertEqual(LogManagerCollectionConfig.objects.count(), 1)
+        self.assertEqual(config.sample_size, 0.5)
+        self.assertEqual(config.buffer_size, 8192)
+
+
+class CollectionLogDirectoryTests(TestCase):
+    def setUp(self):
+        self.user = UserFactory()
+        self.collection = Collection.objects.create(acron3="scl", acron2="sc")
+        self.config = LogManagerCollectionConfig.create_or_update(
+            user=self.user,
+            collection=self.collection,
+            sample_size=0.1,
+            buffer_size=2048,
+            expected_logs_per_day=1,
+        )
+
+    def test_create_or_update_creates_directory(self):
+        directory = CollectionLogDirectory.create_or_update(
+            user=self.user,
+            config=self.config,
+            directory_name="classic-logs",
+            path="/data/logs/scl",
+            active=True,
+            translator_class="classic",
+        )
+
+        self.assertEqual(directory.config, self.config)
+        self.assertEqual(directory.path, "/data/logs/scl")
+        self.assertEqual(directory.translator_class, "classic")
+
+    def test_translator_class_defaults_to_classic(self):
+        directory = CollectionLogDirectory.create_or_update(
+            user=self.user,
+            config=self.config,
+            directory_name="logs",
+            path="/data/logs/scl",
+            active=True,
+            translator_class=None,
+        )
+
+        self.assertEqual(directory.translator_class, "classic")
diff --git a/log_manager_config/views.py b/log_manager_config/views.py
index 91ea44a..60f00ef 100644
--- a/log_manager_config/views.py
+++ b/log_manager_config/views.py
@@ -1,3 +1 @@
-from django.shortcuts import render
-
 # Create your views here.
diff --git a/log_manager_config/wagtail_hooks.py b/log_manager_config/wagtail_hooks.py
index f91c0b1..100fda3 100644
--- a/log_manager_config/wagtail_hooks.py
+++ b/log_manager_config/wagtail_hooks.py
@@ -3,6 +3,7 @@
 
 from log_manager_config.models import LogManagerCollectionConfig
 
+
 class LogManagerCollectionConfigSnippetViewSet(SnippetViewSet):
     model = LogManagerCollectionConfig
     menu_label = _("Log Manager Configurations")
@@ -16,9 +17,5 @@ class LogManagerCollectionConfigSnippetViewSet(SnippetViewSet):
         "expected_logs_per_day",
         "updated",
     )
-    list_filter = (
-        "collection",
-    )
-    search_fields = (
-        "collection__acron3",
-    )
+    list_filter = ("collection",)
+    search_fields = ("collection__acron3",)
diff --git a/metrics/admin.py b/metrics/admin.py
deleted file mode 100755
index 8c38f3f..0000000
--- a/metrics/admin.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.contrib import admin
-
-# Register your models here.
diff --git a/metrics/counter/__init__.py b/metrics/counter/__init__.py
index c9afd92..e69de29 100644
--- a/metrics/counter/__init__.py
+++ b/metrics/counter/__init__.py
@@ -1,22 +0,0 @@
-from .access import (
-    extract_item_access_data,
-    is_valid_item_access_data,
-    update_results_with_item_access_data,
-)
-from .documents import convert_raw_results_to_index_documents
-from .identifiers import (
-    generate_item_access_id,
-    generate_month_document_id,
-    generate_user_session_id,
-    generate_year_document_id,
-)
-from .parser import (
-    extract_date_from_validation_dict,
-    translator_class_name_to_obj,
-)
-from metrics.opensearch.names import (
-    extract_access_month,
-    extract_access_year,
-    generate_month_index_name,
-    generate_year_index_name,
-)
diff --git a/metrics/counter/access.py b/metrics/counter/access.py
deleted file mode 100644
index 65f9b27..0000000
--- a/metrics/counter/access.py
+++ /dev/null
@@ -1,533 +0,0 @@
-import re
-from urllib.parse import unquote, urlparse
-
-from scielo_usage_counter.values import (
-    CONTENT_TYPE_UNDEFINED,
-    DEFAULT_SCIELO_ISSN,
-    MEDIA_LANGUAGE_UNDEFINED,
-    MEDIA_FORMAT_UNDEFINED,
-)
-
-from core.utils.standardizer import (
-    standardize_language_code,
-    standardize_pid_generic,
-    standardize_pid_v2,
-    standardize_pid_v3,
-    standardize_year_of_publication,
-)
-from core.utils.date_utils import extract_minute_second_key, truncate_datetime_to_hour
-from metrics.counter.identifiers import (
-    generate_item_access_id,
-    generate_user_session_id,
-)
-
-
-def extract_item_access_data(collection_acron3: str, translated_url: dict):
-    if not translated_url or not isinstance(translated_url, dict):
-        return {}
-
-    source_type = _extract_source_type(collection_acron3, translated_url)
-    source_id = _extract_source_id(collection_acron3, translated_url, source_type)
-    scielo_issn = _extract_scielo_issn(translated_url, source_type, source_id)
-    document_type = _extract_document_type(
-        collection_acron3, translated_url, source_type
-    )
-    publication_year = _safe_standardize(
-        standardize_year_of_publication,
-        translated_url.get("year_of_publication"),
-    )
-    source_access_type = translated_url.get("source_access_type")
-
-    return {
-        "collection": collection_acron3,
-        "source_type": source_type,
-        "source_id": source_id,
-        "scielo_issn": scielo_issn,
-        "document_type": document_type,
-        "document_title": _extract_document_title(translated_url, document_type),
-        "pid_v2": _safe_standardize(standardize_pid_v2, translated_url.get("pid_v2")),
-        "pid_v3": _safe_standardize(standardize_pid_v3, translated_url.get("pid_v3")),
-        "pid_generic": _safe_standardize(
-            standardize_pid_generic,
-            translated_url.get("pid_generic"),
-        ),
-        "title_pid_generic": _safe_standardize(
-            standardize_pid_generic,
-            translated_url.get("title_pid_generic"),
-        ),
-        "segment_pid_generics": _standardize_pid_generic_list(
-            translated_url.get("segment_pid_generics"),
-        ),
-        "media_language": _safe_standardize(
-            standardize_language_code,
-            translated_url.get("media_language"),
-            default="un",
-        ),
-        "media_format": translated_url.get("media_format"),
-        "content_type": translated_url.get("content_type"),
-        "access_url": translated_url.get("access_url")
-        or translated_url.get("normalized_url"),
-        "publication_year": publication_year,
-        "counter_access_type": _counter_access_type(source_access_type),
-        "access_method": "Regular",
-        "source_main_title": _extract_source_title(translated_url),
-        "source_subject_area_capes": translated_url.get("source_subject_area_capes")
-        or translated_url.get("journal_subject_area_capes"),
-        "source_subject_area_wos": translated_url.get("source_subject_area_wos")
-        or translated_url.get("journal_subject_area_wos"),
-        "source_acronym": translated_url.get("source_acronym")
-        or translated_url.get("journal_acronym"),
-        "source_publisher_name": translated_url.get("source_publisher_name")
-        or translated_url.get("journal_publisher_name"),
-        "source_access_type": source_access_type,
-        "source_identifiers": _extract_source_identifiers(
-            translated_url, source_id, source_type
-        ),
-        "source_city": translated_url.get("source_city"),
-        "source_country": translated_url.get("source_country"),
-    }
-
-
-def is_valid_item_access_data(data: dict, utm=None, ignore_utm_validation=False):
-    if not isinstance(data, dict):
-        return False, {
-            "message": "Invalid data format. Expected a dictionary.",
-            "code": "invalid_format",
-        }
-
-    scielo_issn = data.get("scielo_issn")
-    source_id = data.get("source_id")
-    source_type = data.get("source_type")
-    document_type = data.get("document_type") or "article"
-    media_format = data.get("media_format")
-    media_language = data.get("media_language")
-    content_type = data.get("content_type")
-    pid_v2 = data.get("pid_v2")
-    pid_v3 = data.get("pid_v3")
-    pid_generic = data.get("pid_generic")
-    has_source_identity = bool(source_id) or bool(
-        scielo_issn and scielo_issn != DEFAULT_SCIELO_ISSN
-    )
-    has_media_language = bool(
-        media_language and media_language != MEDIA_LANGUAGE_UNDEFINED
-    )
-    has_pid = bool(pid_v2 or pid_v3 or pid_generic)
-
-    if not all(
-        [
-            media_format and media_format != MEDIA_FORMAT_UNDEFINED,
-            content_type and content_type != CONTENT_TYPE_UNDEFINED,
-            has_pid,
-        ]
-    ):
-        return False, {
-            "message": "Missing required fields in item access data.",
-            "code": "missing_fields",
-        }
-
-    if document_type in {"article", "book", "chapter"} and not has_media_language:
-        return False, {
-            "message": "Missing media language in item access data.",
-            "code": "missing_fields",
-        }
-
-    if document_type == "article" and not has_source_identity:
-        return False, {
-            "message": "Missing article source identity.",
-            "code": "missing_fields",
-        }
-
-    if document_type in {"book", "chapter"} and not source_id:
-        return False, {
-            "message": "Missing book source identity.",
-            "code": "missing_fields",
-        }
-
-    if document_type in {"preprint", "dataset"} and not pid_generic:
-        return False, {
-            "message": "Missing generic PID in item access data.",
-            "code": "missing_fields",
-        }
-
-    if utm and not ignore_utm_validation:
-        if (
-            source_type == "journal"
-            and scielo_issn
-            and scielo_issn != DEFAULT_SCIELO_ISSN
-            and not utm.is_valid_code(scielo_issn, utm.sources_metadata["issn_set"])
-        ):
-            return False, {
-                "message": f"Invalid scielo_issn: {scielo_issn}",
-                "code": "invalid_scielo_issn",
-            }
-
-        if (
-            source_type
-            and source_type != "journal"
-            and source_id
-            and source_id not in utm.sources_metadata.get("source_id_to_type", {})
-        ):
-            return False, {
-                "message": f"Invalid source_id: {source_id}",
-                "code": "invalid_source_id",
-            }
-
-        if pid_v2 and not utm.is_valid_code(pid_v2, utm.documents_metadata["pid_set"]):
-            return False, {
-                "message": f"Invalid pid_v2: {pid_v2}",
-                "code": "invalid_pid_v2",
-            }
-
-        if pid_v3 and not utm.is_valid_code(pid_v3, utm.documents_metadata["pid_set"]):
-            return False, {
-                "message": f"Invalid pid_v3: {pid_v3}",
-                "code": "invalid_pid_v3",
-            }
-
-        if pid_generic and not utm.is_valid_code(
-            pid_generic, utm.documents_metadata["pid_set"]
-        ):
-            return False, {
-                "message": f"Invalid pid_generic: {pid_generic}",
-                "code": "invalid_pid_generic",
-            }
-
-    return True, {"message": "Item access data is valid.", "code": "valid"}
-
-
-def update_results_with_item_access_data(
-    results: dict, item_access_data: dict, line: dict
-):
-    col_acron3 = item_access_data.get("collection")
-    source_key = (
-        item_access_data.get("source_id")
-        or item_access_data.get("scielo_issn")
-        or item_access_data.get("source_type")
-        or col_acron3
-    )
-    pid_v2 = item_access_data.get("pid_v2")
-    pid_v3 = item_access_data.get("pid_v3")
-    media_format = item_access_data.get("media_format")
-    content_language = item_access_data.get("media_language")
-    content_type = item_access_data.get("content_type")
-    access_url = item_access_data.get("access_url") or _normalize_access_url(
-        line.get("url")
-    )
-
-    client_name = line.get("client_name")
-    client_version = line.get("client_version")
-    local_datetime = line.get("local_datetime")
-    access_country_code = line.get("country_code")
-    ip_address = line.get("ip_address")
-
-    truncated_datetime = truncate_datetime_to_hour(local_datetime)
-    ms_key = extract_minute_second_key(local_datetime)
-    if truncated_datetime is None or ms_key is None:
-        raise ValueError("Invalid local_datetime in parsed log line.")
-
-    access_date = truncated_datetime.strftime("%Y-%m-%d")
-    access_year = access_date[:4]
-    access_month = access_date[:7].replace("-", "")
-
-    user_session_id = generate_user_session_id(
-        client_name,
-        client_version,
-        ip_address,
-        truncated_datetime,
-    )
-
-    for access_target in _iter_access_targets(item_access_data):
-        item_access_id = generate_item_access_id(
-            user_session_id=user_session_id,
-            col_acron3=col_acron3,
-            source_key=source_key,
-            pid_v2=pid_v2,
-            pid_v3=pid_v3,
-            pid_generic=access_target.get("pid_generic"),
-            content_language=content_language,
-            access_country_code=access_country_code,
-            media_format=media_format,
-            content_type=content_type,
-        )
-
-        if item_access_id not in results:
-            results[item_access_id] = {
-                "collection": col_acron3,
-                "source_key": source_key,
-                "document_type": access_target.get("document_type"),
-                "pid_v2": pid_v2,
-                "pid_v3": pid_v3,
-                "pid_generic": access_target.get("pid_generic"),
-                "document": _build_document(item_access_data),
-                "title_pid_generic": (
-                    item_access_data.get("title_pid_generic")
-                    or access_target.get("pid_generic")
-                ),
-                "user_session_id": user_session_id,
-                "click_timestamps": {ms_key: 0},
-                "click_timestamps_by_url": {},
-                "access_url": access_url,
-                "media_format": media_format,
-                "content_language": content_language,
-                "content_type": content_type,
-                "access_country_code": access_country_code,
-                "access_date": access_date,
-                "access_year": access_year,
-                "access_month": access_month,
-                "publication_year": item_access_data.get("publication_year"),
-                "counter_access_type": item_access_data.get("counter_access_type")
-                or "Open",
-                "access_method": item_access_data.get("access_method") or "Regular",
-                "source": {
-                    "source_type": item_access_data.get("source_type"),
-                    "source_id": item_access_data.get("source_id"),
-                    "scielo_issn": item_access_data.get("scielo_issn"),
-                    "main_title": item_access_data.get("source_main_title"),
-                    "identifiers": item_access_data.get("source_identifiers"),
-                    "access_type": item_access_data.get("source_access_type"),
-                    "city": item_access_data.get("source_city"),
-                    "country": item_access_data.get("source_country"),
-                    "subject_area_capes": item_access_data.get(
-                        "source_subject_area_capes"
-                    ),
-                    "subject_area_wos": item_access_data.get("source_subject_area_wos"),
-                    "acronym": item_access_data.get("source_acronym"),
-                    "publisher_name": item_access_data.get("source_publisher_name"),
-                },
-            }
-
-        if ms_key not in results[item_access_id]["click_timestamps"]:
-            results[item_access_id]["click_timestamps"][ms_key] = 0
-
-        results[item_access_id]["click_timestamps"][ms_key] += 1
-
-        access_url_key = access_url or _fallback_access_url_key(
-            access_target.get("pid_generic"),
-            media_format,
-            content_type,
-        )
-        timestamps_by_url = results[item_access_id].setdefault(
-            "click_timestamps_by_url", {}
-        )
-        url_timestamps = timestamps_by_url.setdefault(access_url_key, {})
-        if ms_key not in url_timestamps:
-            url_timestamps[ms_key] = 0
-        url_timestamps[ms_key] += 1
-
-
-def _extract_source_type(collection_acron3, translated_url):
-    source_type = translated_url.get("source_type")
-    if source_type:
-        return source_type
-
-    if collection_acron3 == "preprints":
-        return "preprint_server"
-
-    if collection_acron3 == "data":
-        return "data_repository"
-
-    if collection_acron3 == "books":
-        return "book"
-
-    if translated_url.get("book_id"):
-        return "book"
-
-    if (
-        translated_url.get("scielo_issn")
-        and translated_url.get("scielo_issn") != DEFAULT_SCIELO_ISSN
-    ):
-        return "journal"
-
-    if translated_url.get("journal_acronym") or translated_url.get(
-        "journal_main_title"
-    ):
-        return "journal"
-
-    return "other"
-
-
-def _extract_source_id(collection_acron3, translated_url, source_type):
-    source_id = translated_url.get("source_id")
-    if source_id:
-        return source_id
-
-    if source_type == "preprint_server":
-        return translated_url.get("preprint_server_id") or "scielo-preprints"
-
-    if source_type == "data_repository":
-        return translated_url.get("repository_id") or "scielo-data"
-
-    if source_type == "book":
-        return (
-            translated_url.get("book_id")
-            or _extract_book_id_from_pid(translated_url.get("title_pid_generic"))
-            or _extract_book_id_from_pid(translated_url.get("pid_generic"))
-        )
-
-    if source_type == "journal":
-        return translated_url.get("scielo_issn")
-
-    return None
-
-
-def _extract_scielo_issn(translated_url, source_type, source_id):
-    scielo_issn = translated_url.get("scielo_issn")
-    if scielo_issn:
-        return scielo_issn
-
-    if source_type == "journal" and source_id:
-        return source_id
-
-    if source_type in {"book", "other"} or translated_url.get("book_id"):
-        return DEFAULT_SCIELO_ISSN
-
-    return None
-
-
-def _extract_source_title(translated_url):
-    return (
-        translated_url.get("source_main_title")
-        or translated_url.get("journal_main_title")
-        or translated_url.get("book_title")
-    )
-
-
-def _extract_document_title(translated_url, document_type):
-    if document_type == "chapter":
-        return translated_url.get("chapter_title")
-    if document_type == "book":
-        return translated_url.get("book_title")
-    return (
-        translated_url.get("document_title")
-        or translated_url.get("article_title")
-        or translated_url.get("title")
-    )
-
-
-def _extract_document_type(collection_acron3, translated_url, source_type):
-    document_type = translated_url.get("document_type")
-    if document_type:
-        return document_type
-
-    if collection_acron3 == "preprints":
-        return "preprint"
-
-    if collection_acron3 == "data":
-        return "dataset"
-
-    if collection_acron3 == "books" or source_type == "book":
-        pid_generic = translated_url.get("pid_generic") or ""
-        if translated_url.get("chapter_id") or "/CHAPTER:" in pid_generic.upper():
-            return "chapter"
-        if translated_url.get("book_id"):
-            return "book"
-        return "book"
-
-    if source_type == "journal":
-        return "article"
-
-    return "article"
-
-
-def _extract_source_identifiers(translated_url, source_id, source_type):
-    identifiers = translated_url.get("source_identifiers")
-    if isinstance(identifiers, dict):
-        compact = {
-            key: value
-            for key, value in identifiers.items()
-            if value not in (None, "", [], {}, ())
-        }
-        if compact:
-            return compact
-
-    if source_type != "book":
-        return None
-
-    compact = {
-        "book_id": source_id or translated_url.get("book_id"),
-        "isbn": translated_url.get("isbn"),
-        "eisbn": translated_url.get("eisbn"),
-        "doi": translated_url.get("doi"),
-    }
-    compact = {
-        key: value
-        for key, value in compact.items()
-        if value not in (None, "", [], {}, ())
-    }
-    return compact or None
-
-
-def _extract_book_id_from_pid(value):
-    if not value:
-        return None
-    normalized = str(value).upper()
-    if not normalized.startswith("BOOK:"):
-        return None
-    return normalized.split("BOOK:", 1)[1].split("/", 1)[0] or None
-
-
-def _counter_access_type(source_access_type):
-    normalized = str(source_access_type or "").strip().lower()
-    if normalized == "commercial":
-        return "Controlled"
-    if normalized in {"free_to_read", "free-to-read", "free"}:
-        return "Free_To_Read"
-    return "Open"
-
-
-def _safe_standardize(func, value, default=""):
-    try:
-        return func(value)
-    except Exception:
-        return default
-
-
-def _standardize_pid_generic_list(values):
-    if not isinstance(values, (list, tuple, set)):
-        return []
-    items = []
-    for value in values:
-        item = _safe_standardize(standardize_pid_generic, value)
-        if item and item not in items:
-            items.append(item)
-    return items
-
-
-def _build_document(item_access_data):
-    title = item_access_data.get("document_title")
-    if not title:
-        return {}
-    return {"title": title}
-
-
-def _iter_access_targets(item_access_data):
-    return [
-        {
-            "pid_generic": item_access_data.get("pid_generic"),
-            "document_type": item_access_data.get("document_type"),
-        }
-    ]
-
-
-def _normalize_access_url(url):
-    if not url:
-        return None
-    parsed_url = urlparse(str(url).strip())
-    path = (
-        parsed_url.path if parsed_url.scheme or parsed_url.netloc else str(url).strip()
-    )
-    path = unquote(path or "")
-    path = path.split("?", 1)[0].split("#", 1)[0].split()[0]
-    path = re.sub(r"/+", "/", path)
-    path = path.rstrip(".,;:")
-    return path or None
-
-
-def _fallback_access_url_key(pid_generic, media_format, content_type):
-    return "|".join(
-        [
-            str(pid_generic or ""),
-            str(media_format or ""),
-            str(content_type or ""),
-        ]
-    )
diff --git a/metrics/counter/access/__init__.py b/metrics/counter/access/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/counter/access/accumulation.py b/metrics/counter/access/accumulation.py
new file mode 100644
index 0000000..bed2407
--- /dev/null
+++ b/metrics/counter/access/accumulation.py
@@ -0,0 +1,206 @@
+import re
+from urllib.parse import unquote, urlparse
+
+from core.utils.date_utils import extract_minute_second_key, truncate_datetime_to_hour
+
+
+def accumulate(results, counter_access, line):
+    access_url = counter_access.get("access_url") or _normalized_access_path(
+        line.get("url")
+    )
+    counter_access = {**counter_access, "access_url": access_url}
+
+    client_name = line.get("client_name")
+    client_version = line.get("client_version")
+    local_datetime = line.get("local_datetime")
+    ip_address = line.get("ip_address")
+
+    access_datetime = truncate_datetime_to_hour(local_datetime)
+    ms_key = extract_minute_second_key(local_datetime)
+    if access_datetime is None or ms_key is None:
+        raise ValueError("Invalid local_datetime in parsed log line.")
+
+    user_session_id = _generate_user_session_id(
+        client_name,
+        client_version,
+        ip_address,
+        access_datetime,
+    )
+    raw_record = _build_record(
+        counter_access=counter_access,
+        line=line,
+        access_datetime=access_datetime,
+        minute_second_key=ms_key,
+        user_session_id=user_session_id,
+    )
+    item_access_id = raw_record["id"]
+
+    if item_access_id not in results:
+        results[item_access_id] = raw_record["data"]
+
+    _increment_timestamp_count(results[item_access_id]["click_timestamps"], ms_key)
+
+    access_url_key = access_url or "|".join(
+        [
+            str(counter_access.get("pid_generic") or ""),
+            str(counter_access.get("media_format") or ""),
+            str(counter_access.get("content_type") or ""),
+        ]
+    )
+    timestamps_by_url = results[item_access_id].setdefault(
+        "click_timestamps_by_url", {}
+    )
+    url_timestamps = timestamps_by_url.setdefault(access_url_key, {})
+    _increment_timestamp_count(url_timestamps, ms_key)
+
+
+def _build_record(
+    counter_access, line, access_datetime, minute_second_key, user_session_id
+):
+    collection = counter_access.get("collection")
+    source_key = _source_key(counter_access, collection)
+    pid_v2 = counter_access.get("pid_v2")
+    pid_v3 = counter_access.get("pid_v3")
+    pid_generic = counter_access.get("pid_generic")
+    media_format = counter_access.get("media_format")
+    content_language = counter_access.get("media_language")
+    content_type = counter_access.get("content_type")
+    access_country_code = line.get("country_code")
+    access_date = access_datetime.strftime("%Y-%m-%d")
+
+    return {
+        "id": _generate_item_access_id(
+            user_session_id=user_session_id,
+            col_acron3=collection,
+            source_key=source_key,
+            pid_v2=pid_v2,
+            pid_v3=pid_v3,
+            pid_generic=pid_generic,
+            content_language=content_language,
+            access_country_code=access_country_code,
+            media_format=media_format,
+            content_type=content_type,
+        ),
+        "data": {
+            "collection": collection,
+            "source_key": source_key,
+            "document_type": counter_access.get("document_type"),
+            "pid_v2": pid_v2,
+            "pid_v3": pid_v3,
+            "pid_generic": pid_generic,
+            "document": _document_metadata(counter_access),
+            "title_pid_generic": counter_access.get("title_pid_generic") or pid_generic,
+            "user_session_id": user_session_id,
+            "click_timestamps": {minute_second_key: 0},
+            "click_timestamps_by_url": {},
+            "access_url": counter_access.get("access_url"),
+            "media_format": media_format,
+            "content_language": content_language,
+            "content_type": content_type,
+            "access_country_code": access_country_code,
+            "access_date": access_date,
+            "access_year": access_date[:4],
+            "access_month": access_date[:7].replace("-", ""),
+            "publication_year": counter_access.get("publication_year"),
+            "counter_access_type": counter_access.get("counter_access_type") or "Open",
+            "access_method": counter_access.get("access_method") or "Regular",
+            "source": _source_metadata(counter_access),
+        },
+    }
+
+
+def _increment_timestamp_count(timestamps, key):
+    if key not in timestamps:
+        timestamps[key] = 0
+    timestamps[key] += 1
+
+
+def _normalized_access_path(url):
+    if not url:
+        return None
+    parsed_url = urlparse(str(url).strip())
+    path = (
+        parsed_url.path if parsed_url.scheme or parsed_url.netloc else str(url).strip()
+    )
+    path = unquote(path or "")
+    path = path.split("?", 1)[0].split("#", 1)[0].split()[0]
+    path = re.sub(r"/+", "/", path)
+    path = path.rstrip(".,;:")
+    return path or None
+
+
+def _generate_user_session_id(
+    client_name, client_version, ip_address, datetime, sep="|"
+):
+    dt_year_month_day = datetime.strftime("%Y-%m-%d")
+    dt_hour = datetime.strftime("%H")
+
+    return sep.join(
+        [
+            str(client_name),
+            str(client_version),
+            str(ip_address),
+            str(dt_year_month_day),
+            str(dt_hour),
+        ]
+    )
+
+
+def _document_metadata(counter_access):
+    document_title = counter_access.get("document_title")
+    return {"title": document_title} if document_title else {}
+
+
+def _source_metadata(counter_access):
+    return {
+        "source_type": counter_access.get("source_type"),
+        "source_id": counter_access.get("source_id"),
+        "scielo_issn": counter_access.get("scielo_issn"),
+        "main_title": counter_access.get("source_main_title"),
+        "identifiers": counter_access.get("source_identifiers"),
+        "access_type": counter_access.get("source_access_type"),
+        "city": counter_access.get("source_city"),
+        "country": counter_access.get("source_country"),
+        "subject_area_capes": counter_access.get("source_subject_area_capes"),
+        "subject_area_wos": counter_access.get("source_subject_area_wos"),
+        "acronym": counter_access.get("source_acronym"),
+        "publisher_name": counter_access.get("source_publisher_name"),
+    }
+
+
+def _source_key(counter_access, fallback):
+    return (
+        counter_access.get("source_id")
+        or counter_access.get("scielo_issn")
+        or counter_access.get("source_type")
+        or fallback
+    )
+
+
+def _generate_item_access_id(
+    col_acron3,
+    source_key,
+    pid_v2,
+    pid_v3,
+    pid_generic,
+    user_session_id,
+    access_country_code,
+    content_language,
+    media_format,
+    content_type,
+    sep="|",
+):
+    return sep.join(
+        [
+            col_acron3,
+            str(source_key or ""),
+            pid_v2 or "",
+            pid_v3 or "",
+            pid_generic or "",
+            str(user_session_id or ""),
+            str(access_country_code or ""),
+            str(content_language or ""),
+            str(media_format or ""),
+            str(content_type or ""),
+        ]
+    )
diff --git a/metrics/counter/access/extraction.py b/metrics/counter/access/extraction.py
new file mode 100644
index 0000000..54ac429
--- /dev/null
+++ b/metrics/counter/access/extraction.py
@@ -0,0 +1,199 @@
+from scielo_usage_counter.values import DEFAULT_SCIELO_ISSN
+
+from core.utils.standardizer import (
+    standardize_language_code,
+    standardize_or_default,
+    standardize_pid_generic,
+    standardize_pid_generic_values,
+    standardize_pid_v2,
+    standardize_pid_v3,
+    standardize_year_of_publication,
+)
+
+
+def extract(collection_acron3, translated_url):
+    if not translated_url or not isinstance(translated_url, dict):
+        return {}
+
+    source_type = _resolve_source_type(collection_acron3, translated_url)
+    source_id = _resolve_source_id(translated_url, source_type)
+    scielo_issn = _resolve_scielo_issn(translated_url, source_type, source_id)
+    document_type = _resolve_document_type(
+        collection_acron3, translated_url, source_type
+    )
+    publication_year = standardize_or_default(
+        standardize_year_of_publication,
+        translated_url.get("year_of_publication"),
+    )
+    source_access_type = translated_url.get("source_access_type")
+
+    return {
+        "collection": collection_acron3,
+        "source_type": source_type,
+        "source_id": source_id,
+        "scielo_issn": scielo_issn,
+        "document_type": document_type,
+        "document_title": _resolve_document_title(document_type, translated_url),
+        "pid_v2": standardize_or_default(
+            standardize_pid_v2,
+            translated_url.get("pid_v2"),
+        ),
+        "pid_v3": standardize_or_default(
+            standardize_pid_v3,
+            translated_url.get("pid_v3"),
+        ),
+        "pid_generic": standardize_or_default(
+            standardize_pid_generic,
+            translated_url.get("pid_generic"),
+        ),
+        "title_pid_generic": standardize_or_default(
+            standardize_pid_generic,
+            translated_url.get("title_pid_generic"),
+        ),
+        "segment_pid_generics": standardize_pid_generic_values(
+            translated_url.get("segment_pid_generics"),
+        ),
+        "media_language": standardize_or_default(
+            standardize_language_code,
+            translated_url.get("media_language"),
+            default="un",
+        ),
+        "media_format": translated_url.get("media_format"),
+        "content_type": translated_url.get("content_type"),
+        "access_url": translated_url.get("access_url")
+        or translated_url.get("normalized_url"),
+        "publication_year": publication_year,
+        "counter_access_type": _resolve_counter_access_type(source_access_type),
+        "access_method": "Regular",
+        "source_main_title": (
+            translated_url.get("source_main_title")
+            or translated_url.get("journal_main_title")
+            or translated_url.get("book_title")
+        ),
+        "source_subject_area_capes": translated_url.get("source_subject_area_capes")
+        or translated_url.get("journal_subject_area_capes"),
+        "source_subject_area_wos": translated_url.get("source_subject_area_wos")
+        or translated_url.get("journal_subject_area_wos"),
+        "source_acronym": translated_url.get("source_acronym")
+        or translated_url.get("journal_acronym"),
+        "source_publisher_name": translated_url.get("source_publisher_name")
+        or translated_url.get("journal_publisher_name"),
+        "source_access_type": source_access_type,
+        "source_identifiers": _resolve_source_identifiers(translated_url),
+        "source_city": translated_url.get("source_city"),
+        "source_country": translated_url.get("source_country"),
+    }
+
+
+def _resolve_document_title(document_type, translated_url):
+    if document_type == "chapter":
+        return translated_url.get("chapter_title")
+
+    if document_type == "book":
+        return translated_url.get("book_title")
+
+    return (
+        translated_url.get("document_title")
+        or translated_url.get("article_title")
+        or translated_url.get("title")
+    )
+
+
+def _resolve_counter_access_type(source_access_type):
+    normalized_access_type = str(source_access_type or "").strip().lower()
+    if normalized_access_type == "commercial":
+        return "Controlled"
+
+    if normalized_access_type in {"free_to_read", "free-to-read", "free"}:
+        return "Free_To_Read"
+
+    return "Open"
+
+
+def _resolve_source_type(collection_acron3, translated_url):
+    source_type = translated_url.get("source_type")
+    if source_type:
+        return source_type
+
+    if collection_acron3 == "preprints":
+        return "preprint_server"
+
+    if collection_acron3 == "data":
+        return "data_repository"
+
+    if (
+        translated_url.get("scielo_issn")
+        and translated_url.get("scielo_issn") != DEFAULT_SCIELO_ISSN
+    ):
+        return "journal"
+
+    if translated_url.get("journal_acronym") or translated_url.get(
+        "journal_main_title"
+    ):
+        return "journal"
+
+    return "other"
+
+
+def _resolve_source_id(translated_url, source_type):
+    source_id = translated_url.get("source_id")
+    if source_id:
+        return source_id
+
+    if source_type == "preprint_server":
+        return translated_url.get("preprint_server_id") or "scielo-preprints"
+
+    if source_type == "data_repository":
+        return translated_url.get("repository_id") or "scielo-data"
+
+    if source_type == "journal":
+        return translated_url.get("scielo_issn")
+
+    return None
+
+
+def _resolve_scielo_issn(translated_url, source_type, source_id):
+    scielo_issn = translated_url.get("scielo_issn")
+    if scielo_issn:
+        return scielo_issn
+
+    if source_type == "journal" and source_id:
+        return source_id
+
+    if source_type in {"book", "other"}:
+        return DEFAULT_SCIELO_ISSN
+
+    return None
+
+
+def _resolve_document_type(collection_acron3, translated_url, source_type):
+    document_type = translated_url.get("document_type")
+    if document_type:
+        return document_type
+
+    if collection_acron3 == "preprints":
+        return "preprint"
+
+    if collection_acron3 == "data":
+        return "dataset"
+
+    if source_type == "journal":
+        return "article"
+
+    return "article"
+
+
+def _resolve_source_identifiers(translated_url):
+    identifiers = translated_url.get("source_identifiers")
+    if isinstance(identifiers, dict):
+        return _compact_identifiers(identifiers)
+    return None
+
+
+def _compact_identifiers(identifiers):
+    compact = {
+        key: value
+        for key, value in identifiers.items()
+        if value not in (None, "", [], {}, ())
+    }
+    return compact or None
diff --git a/metrics/counter/access/validation.py b/metrics/counter/access/validation.py
new file mode 100644
index 0000000..673b6c1
--- /dev/null
+++ b/metrics/counter/access/validation.py
@@ -0,0 +1,113 @@
+from scielo_usage_counter.values import (
+    CONTENT_TYPE_UNDEFINED,
+    DEFAULT_SCIELO_ISSN,
+    MEDIA_FORMAT_UNDEFINED,
+    MEDIA_LANGUAGE_UNDEFINED,
+)
+
+
+def is_valid(data, utm=None, ignore_utm_validation=False):
+    if not isinstance(data, dict):
+        return False, {
+            "message": "Invalid data format. Expected a dictionary.",
+            "code": "invalid_format",
+        }
+
+    scielo_issn = data.get("scielo_issn")
+    source_id = data.get("source_id")
+    source_type = data.get("source_type")
+    document_type = data.get("document_type") or "article"
+    media_format = data.get("media_format")
+    media_language = data.get("media_language")
+    content_type = data.get("content_type")
+    pid_v2 = data.get("pid_v2")
+    pid_v3 = data.get("pid_v3")
+    pid_generic = data.get("pid_generic")
+    has_source_identity = bool(source_id) or bool(
+        scielo_issn and scielo_issn != DEFAULT_SCIELO_ISSN
+    )
+    has_media_language = bool(
+        media_language and media_language != MEDIA_LANGUAGE_UNDEFINED
+    )
+    has_pid = bool(pid_v2 or pid_v3 or pid_generic)
+
+    if not all(
+        [
+            media_format and media_format != MEDIA_FORMAT_UNDEFINED,
+            content_type and content_type != CONTENT_TYPE_UNDEFINED,
+            has_pid,
+        ]
+    ):
+        return False, {
+            "message": "Missing required fields in item access data.",
+            "code": "missing_fields",
+        }
+
+    if document_type in {"article", "book", "chapter"} and not has_media_language:
+        return False, {
+            "message": "Missing media language in item access data.",
+            "code": "missing_fields",
+        }
+
+    if document_type == "article" and not has_source_identity:
+        return False, {
+            "message": "Missing article source identity.",
+            "code": "missing_fields",
+        }
+
+    if document_type in {"book", "chapter"} and not source_id:
+        return False, {
+            "message": "Missing book source identity.",
+            "code": "missing_fields",
+        }
+
+    if document_type in {"preprint", "dataset"} and not pid_generic:
+        return False, {
+            "message": "Missing generic PID in item access data.",
+            "code": "missing_fields",
+        }
+
+    if utm and not ignore_utm_validation:
+        if (
+            source_type == "journal"
+            and scielo_issn
+            and scielo_issn != DEFAULT_SCIELO_ISSN
+            and not utm.is_valid_code(scielo_issn, utm.sources_metadata["issn_set"])
+        ):
+            return False, {
+                "message": f"Invalid scielo_issn: {scielo_issn}",
+                "code": "invalid_scielo_issn",
+            }
+
+        if (
+            source_type
+            and source_type != "journal"
+            and source_id
+            and source_id not in utm.sources_metadata.get("source_id_to_type", {})
+        ):
+            return False, {
+                "message": f"Invalid source_id: {source_id}",
+                "code": "invalid_source_id",
+            }
+
+        if pid_v2 and not utm.is_valid_code(pid_v2, utm.documents_metadata["pid_set"]):
+            return False, {
+                "message": f"Invalid pid_v2: {pid_v2}",
+                "code": "invalid_pid_v2",
+            }
+
+        if pid_v3 and not utm.is_valid_code(pid_v3, utm.documents_metadata["pid_set"]):
+            return False, {
+                "message": f"Invalid pid_v3: {pid_v3}",
+                "code": "invalid_pid_v3",
+            }
+
+        if pid_generic and not utm.is_valid_code(
+            pid_generic, utm.documents_metadata["pid_set"]
+        ):
+            return False, {
+                "message": f"Invalid pid_generic: {pid_generic}",
+                "code": "invalid_pid_generic",
+            }
+
+    return True, {"message": "Item access data is valid.", "code": "valid"}
diff --git a/metrics/counter/aggregation.py b/metrics/counter/aggregation.py
deleted file mode 100644
index d047e7a..0000000
--- a/metrics/counter/aggregation.py
+++ /dev/null
@@ -1,124 +0,0 @@
-from scielo_usage_counter.counter import get_valid_clicks, is_request
-
-
-def apply_unique_metrics(
-    document,
-    unique_state,
-    scope,
-    document_id,
-    user_session_id,
-    is_request_event,
-):
-    if not user_session_id:
-        return
-
-    inv_bucket = unique_state[f"{scope}_investigations"]
-    inv_key = (document_id, user_session_id)
-    add_investigation = inv_key not in inv_bucket
-    if add_investigation:
-        inv_bucket.add(inv_key)
-
-    add_request = False
-    if is_request_event:
-        req_bucket = unique_state[f"{scope}_requests"]
-        req_key = (document_id, user_session_id)
-        add_request = req_key not in req_bucket
-        if add_request:
-            req_bucket.add(req_key)
-
-    increment_document_uniques(
-        document=document,
-        add_investigation=add_investigation,
-        add_request=add_request,
-    )
-
-
-def increment_document_totals(document, click_timestamps, content_type, click_timestamps_by_url=None):
-    number_of_clicks = _count_valid_clicks(
-        click_timestamps=click_timestamps,
-        click_timestamps_by_url=click_timestamps_by_url,
-    )
-
-    document["total_investigations"] += number_of_clicks
-    if is_request(content_type):
-        document["total_requests"] += number_of_clicks
-
-    if "daily_metrics" in document:
-        day_key = list(document["daily_metrics"].keys())[0]
-        document["daily_metrics"][day_key]["total_investigations"] += number_of_clicks
-        if is_request(content_type):
-            document["daily_metrics"][day_key]["total_requests"] += number_of_clicks
-
-
-def _count_valid_clicks(click_timestamps, click_timestamps_by_url=None):
-    if isinstance(click_timestamps_by_url, dict) and click_timestamps_by_url:
-        return sum(
-            get_valid_clicks(timestamps or {})
-            for timestamps in click_timestamps_by_url.values()
-        )
-    return get_valid_clicks(click_timestamps or {})
-
-
-def increment_document_uniques(document, add_investigation=False, add_request=False):
-    if add_investigation:
-        document["unique_investigations"] += 1
-    if add_request:
-        document["unique_requests"] += 1
-
-    if "daily_metrics" in document:
-        day_key = list(document["daily_metrics"].keys())[0]
-        if add_investigation:
-            document["daily_metrics"][day_key]["unique_investigations"] += 1
-        if add_request:
-            document["daily_metrics"][day_key]["unique_requests"] += 1
-
-
-def counter_data_type(document_type):
-    if document_type == "dataset":
-        return "Dataset"
-    if document_type in {"article", "preprint"}:
-        return "Article"
-    if document_type == "book":
-        return "Book"
-    if document_type == "chapter":
-        return "Book_Segment"
-    return "Other"
-
-
-def parent_data_type(document_type, source_type=None):
-    if document_type == "chapter":
-        return "Book"
-    if document_type == "article" and source_type == "journal":
-        return "Journal"
-    return None
-
-
-def article_version(document_type):
-    if document_type == "preprint":
-        return "Preprint"
-    return None
-
-
-def should_create_book_item_document(value):
-    if not value.get("pid_generic"):
-        return False
-    if value.get("document_type") == "book" and not is_request(value.get("content_type")):
-        return False
-    return True
-
-
-def extract_title_pid_generic(value, fallback=None):
-    title_pid_generic = value.get("title_pid_generic")
-    if title_pid_generic:
-        return title_pid_generic
-
-    pid_generic = value.get("pid_generic")
-    if "/CHAPTER:" in (pid_generic or "").upper():
-        return pid_generic.upper().split("/CHAPTER:")[0]
-
-    source = value.get("source") or {}
-    source_id = source.get("source_id")
-    if source_id:
-        return f"BOOK:{str(source_id).upper()}"
-
-    return fallback
diff --git a/metrics/counter/documents.py b/metrics/counter/documents.py
deleted file mode 100644
index e13c0cf..0000000
--- a/metrics/counter/documents.py
+++ /dev/null
@@ -1,426 +0,0 @@
-from scielo_usage_counter.counter import is_request
-
-from metrics.counter.aggregation import (
-    apply_unique_metrics,
-    article_version,
-    counter_data_type,
-    extract_title_pid_generic,
-    increment_document_totals,
-    parent_data_type,
-    should_create_book_item_document,
-)
-from metrics.counter.identifiers import (
-    generate_month_document_id,
-    generate_year_document_id,
-)
-
-
-def convert_to_month_index_documents(data: dict):
-    if not isinstance(data, dict):
-        return {}
-
-    metrics_data = {}
-    unique_state = _initialize_unique_state()
-
-    for value in data.values():
-        _accumulate_documents(
-            data=metrics_data,
-            unique_state=unique_state,
-            value=value,
-            granularity="month",
-        )
-
-    return metrics_data
-
-
-def convert_to_year_index_documents(data: dict):
-    if not isinstance(data, dict):
-        return {}
-
-    metrics_data = {}
-    unique_state = _initialize_unique_state()
-
-    for value in data.values():
-        _accumulate_documents(
-            data=metrics_data,
-            unique_state=unique_state,
-            value=value,
-            granularity="year",
-        )
-
-    return metrics_data
-
-
-def convert_raw_results_to_index_documents(data: dict):
-    return {
-        "month": convert_to_month_index_documents(data),
-        "year": convert_to_year_index_documents(data),
-    }
-
-
-def _initialize_unique_state():
-    return {
-        "item_investigations": set(),
-        "item_requests": set(),
-        "title_investigations": set(),
-        "title_requests": set(),
-    }
-
-
-def _accumulate_documents(data, unique_state, value, granularity):
-    if not isinstance(value, dict):
-        return
-
-    if value.get("collection") == "books":
-        _accumulate_books_documents(data, unique_state, value, granularity)
-        return
-
-    _accumulate_standard_documents(data, unique_state, value, granularity)
-
-
-def _accumulate_standard_documents(data, unique_state, value, granularity):
-    document_id = _generate_document_id(value, granularity)
-    document = data.setdefault(
-        document_id,
-        _build_base_document(value=value, granularity=granularity),
-    )
-
-    increment_document_totals(
-        document=document,
-        click_timestamps=value.get("click_timestamps"),
-        click_timestamps_by_url=value.get("click_timestamps_by_url"),
-        content_type=value.get("content_type"),
-    )
-    apply_unique_metrics(
-        document=document,
-        unique_state=unique_state,
-        scope="item",
-        document_id=document_id,
-        user_session_id=value.get("user_session_id"),
-        is_request_event=is_request(value.get("content_type")),
-    )
-
-
-def _accumulate_books_documents(data, unique_state, value, granularity):
-    if should_create_book_item_document(value):
-        item_document_id = _generate_document_id(
-            value,
-            granularity,
-            metric_scope="item",
-        )
-        item_document = data.setdefault(
-            item_document_id,
-            _build_base_document(
-                value=value,
-                granularity=granularity,
-                metric_scope="item",
-            ),
-        )
-        increment_document_totals(
-            document=item_document,
-            click_timestamps=value.get("click_timestamps"),
-            click_timestamps_by_url=value.get("click_timestamps_by_url"),
-            content_type=value.get("content_type"),
-        )
-        apply_unique_metrics(
-            document=item_document,
-            unique_state=unique_state,
-            scope="item",
-            document_id=item_document_id,
-            user_session_id=value.get("user_session_id"),
-            is_request_event=is_request(value.get("content_type")),
-        )
-
-    title_pid_generic = extract_title_pid_generic(value)
-    if not title_pid_generic:
-        return
-
-    title_document_id = _generate_document_id(
-        value,
-        granularity,
-        metric_scope="title",
-        pid_generic=title_pid_generic,
-    )
-    title_document = data.setdefault(
-        title_document_id,
-        _build_base_document(
-            value=value,
-            granularity=granularity,
-            metric_scope="title",
-            pid_generic=title_pid_generic,
-            document_type="book",
-        ),
-    )
-    increment_document_totals(
-        document=title_document,
-        click_timestamps=value.get("click_timestamps"),
-        click_timestamps_by_url=value.get("click_timestamps_by_url"),
-        content_type=value.get("content_type"),
-    )
-    apply_unique_metrics(
-        document=title_document,
-        unique_state=unique_state,
-        scope="title",
-        document_id=title_document_id,
-        user_session_id=value.get("user_session_id"),
-        is_request_event=is_request(value.get("content_type")),
-    )
-
-
-def _generate_document_id(value, granularity, metric_scope=None, pid_generic=None):
-    pid_generic = pid_generic or value.get("pid_generic")
-    publication_year = str(value.get("publication_year") or "0001")
-    if granularity == "month":
-        access_month = (
-            value.get("access_date", "")[:7] if value.get("access_date") else ""
-        )
-        return generate_month_document_id(
-            collection=value.get("collection"),
-            source_key=value.get("source_key"),
-            pid_v2=value.get("pid_v2"),
-            pid_v3=value.get("pid_v3"),
-            pid_generic=pid_generic,
-            access_month=access_month,
-            counter_access_type=value.get("counter_access_type") or "Open",
-            access_method=value.get("access_method") or "Regular",
-            publication_year=publication_year,
-            metric_scope="title" if metric_scope == "title" else None,
-        )
-
-    return generate_year_document_id(
-        collection=value.get("collection"),
-        source_key=value.get("source_key"),
-        pid_v2=value.get("pid_v2"),
-        pid_v3=value.get("pid_v3"),
-        pid_generic=pid_generic,
-        content_language=value.get("content_language"),
-        access_country_code=value.get("access_country_code"),
-        access_year=value.get("access_year"),
-        counter_access_type=value.get("counter_access_type") or "Open",
-        access_method=value.get("access_method") or "Regular",
-        publication_year=publication_year,
-        metric_scope="title" if metric_scope == "title" else None,
-    )
-
-
-def _build_base_document(
-    value, granularity, metric_scope=None, pid_generic=None, document_type=None
-):
-    collection = value.get("collection")
-    scope = metric_scope or "item"
-    if collection == "books":
-        document_id = pid_generic or value.get("pid_generic")
-        parent_id = extract_title_pid_generic(value, fallback=document_id)
-        if parent_id == document_id or scope == "title":
-            parent_id = None
-        raw_source = value.get("source") or {}
-        source = _build_source(raw_source)
-        base_document = {
-            "collection": collection,
-            "source": source,
-            "document": _build_document(
-                value=value,
-                document_id=document_id,
-                document_type=document_type or value.get("document_type"),
-                parent_id=parent_id,
-                source_identifiers=raw_source.get("identifiers"),
-                metric_scope=scope,
-            ),
-            "counter": _compact_dict(
-                {
-                    "metric_scope": scope,
-                    "data_type": "Book" if scope == "title" else "Book_Segment",
-                    "parent_data_type": "Book" if scope != "title" else None,
-                    "access_type": value.get("counter_access_type") or "Open",
-                    "access_method": value.get("access_method") or "Regular",
-                }
-            ),
-            "total_requests": 0,
-            "total_investigations": 0,
-            "unique_requests": 0,
-            "unique_investigations": 0,
-        }
-        base_document["access"] = _build_access(value, granularity)
-        if granularity == "month":
-            base_document["daily_metrics"] = _build_daily_metrics(value)
-        return base_document
-
-    document_type = value.get("document_type")
-    document_id = value.get("pid_v3") or value.get("pid_v2") or value.get("pid_generic")
-    base_document = {
-        "collection": collection,
-        "source": _build_source(value.get("source")),
-        "document": _build_document(
-            value=value,
-            document_id=document_id,
-            document_type=document_type,
-        ),
-        "counter": _compact_dict(
-            {
-                "metric_scope": "item",
-                "data_type": counter_data_type(document_type),
-                "parent_data_type": parent_data_type(
-                    document_type,
-                    (value.get("source") or {}).get("source_type"),
-                ),
-                "article_version": article_version(document_type),
-                "access_type": value.get("counter_access_type") or "Open",
-                "access_method": value.get("access_method") or "Regular",
-            }
-        ),
-        "total_requests": 0,
-        "total_investigations": 0,
-        "unique_requests": 0,
-        "unique_investigations": 0,
-    }
-    base_document["access"] = _build_access(value, granularity)
-    if granularity == "month":
-        base_document["daily_metrics"] = _build_daily_metrics(value)
-    return base_document
-
-
-def _build_access(value, granularity):
-    if granularity == "month":
-        return {
-            "month": value.get("access_date", "")[:7]
-            if value.get("access_date")
-            else ""
-        }
-
-    return _compact_dict(
-        {
-            "year": value.get("access_year"),
-            "country_code": value.get("access_country_code"),
-            "content_language": value.get("content_language"),
-        }
-    )
-
-
-def _build_daily_metrics(value):
-    day = value.get("access_date", "")[-2:] if value.get("access_date") else "01"
-    return {
-        day: {
-            "total_requests": 0,
-            "total_investigations": 0,
-            "unique_requests": 0,
-            "unique_investigations": 0,
-        }
-    }
-
-
-def _build_document(
-    value,
-    document_id,
-    document_type,
-    parent_id=None,
-    source_identifiers=None,
-    metric_scope="item",
-):
-    document = value.get("document") or {}
-    title = document.get("title")
-    if metric_scope == "title":
-        title = (value.get("source") or {}).get("main_title") or title
-
-    identifiers = _document_identifiers(
-        value=value,
-        document_id=document_id,
-        source_identifiers=source_identifiers,
-        metric_scope=metric_scope,
-    )
-
-    return _compact_dict(
-        {
-            "id": document_id,
-            "type": document_type,
-            "title": title,
-            "parent_id": parent_id,
-            "publication_year": value.get("publication_year"),
-            "identifiers": identifiers,
-        }
-    )
-
-
-def _document_identifiers(
-    value, document_id, source_identifiers=None, metric_scope="item"
-):
-    if value.get("collection") == "books" and metric_scope == "title":
-        identifiers = _book_identifiers_from_pid(document_id)
-        identifiers.update(source_identifiers or {})
-        return _compact_identifiers(identifiers, canonical_id=document_id)
-
-    document_identifiers = (value.get("document") or {}).get("identifiers") or {}
-    identifiers = {
-        "pid_v2": value.get("pid_v2"),
-        "pid_v3": value.get("pid_v3"),
-        "pid_generic": value.get("pid_generic"),
-    }
-    identifiers.update(document_identifiers)
-
-    if value.get("collection") == "books":
-        identifiers.update(_book_identifiers_from_pid(value.get("pid_generic")))
-        identifiers.update(source_identifiers or {})
-
-    return _compact_identifiers(identifiers, canonical_id=document_id)
-
-
-def _book_identifiers_from_pid(pid_generic):
-    value = str(pid_generic or "")
-    if not value.upper().startswith("BOOK:"):
-        return {}
-
-    identifiers = {}
-    parts = value.split("/", 1)
-    book_id = parts[0].split(":", 1)[1] if ":" in parts[0] else ""
-    if book_id:
-        identifiers["book_id"] = book_id
-
-    if len(parts) > 1 and parts[1].upper().startswith("CHAPTER:"):
-        chapter_id = parts[1].split(":", 1)[1] if ":" in parts[1] else ""
-        if chapter_id:
-            identifiers["chapter_id"] = chapter_id
-
-    return identifiers
-
-
-def _build_source(source):
-    source = source or {}
-    source_id = source.get("source_id")
-    source_type = source.get("source_type")
-    identifiers = _compact_identifiers(
-        source.get("identifiers") or {}, canonical_id=source_id
-    )
-
-    return _compact_dict(
-        {
-            "id": source_id,
-            "type": source_type,
-            "title": source.get("main_title"),
-            "scielo_issn": None if source_type == "book" else source.get("scielo_issn"),
-            "acronym": source.get("acronym"),
-            "publisher_name": source.get("publisher_name"),
-            "subject_area_capes": source.get("subject_area_capes"),
-            "subject_area_wos": source.get("subject_area_wos"),
-            "access_type": source.get("access_type"),
-            "city": source.get("city"),
-            "country": source.get("country"),
-            "identifiers": identifiers,
-        }
-    )
-
-
-def _compact_identifiers(identifiers, canonical_id=None):
-    compact = {}
-    canonical_value = str(canonical_id or "").strip().upper()
-    for key, value in (identifiers or {}).items():
-        if value in (None, "", [], {}, ()):
-            continue
-        if canonical_value and str(value).strip().upper() == canonical_value:
-            continue
-        compact[key] = value
-    return compact
-
-
-def _compact_dict(data):
-    return {
-        key: value for key, value in data.items() if value not in (None, "", [], {}, ())
-    }
diff --git a/metrics/counter/identifiers.py b/metrics/counter/identifiers.py
deleted file mode 100644
index bef7b8d..0000000
--- a/metrics/counter/identifiers.py
+++ /dev/null
@@ -1,110 +0,0 @@
-def generate_user_session_id(client_name, client_version, ip_address, datetime, sep="|"):
-    dt_year_month_day = datetime.strftime("%Y-%m-%d")
-    dt_hour = datetime.strftime("%H")
-
-    return sep.join(
-        [
-            str(client_name),
-            str(client_version),
-            str(ip_address),
-            str(dt_year_month_day),
-            str(dt_hour),
-        ]
-    )
-
-
-def generate_item_access_id(
-    col_acron3,
-    source_key,
-    pid_v2,
-    pid_v3,
-    pid_generic,
-    user_session_id,
-    access_country_code,
-    content_language,
-    media_format,
-    content_type,
-    sep="|",
-):
-    return sep.join(
-        [
-            col_acron3,
-            str(source_key or ""),
-            pid_v2 or "",
-            pid_v3 or "",
-            pid_generic or "",
-            str(user_session_id or ""),
-            str(access_country_code or ""),
-            str(content_language or ""),
-            str(media_format or ""),
-            str(content_type or ""),
-        ]
-    )
-
-
-def generate_month_document_id(
-    collection: str,
-    source_key: str,
-    pid_v2: str,
-    pid_v3: str,
-    pid_generic: str,
-    access_month: str,
-    counter_access_type: str,
-    access_method: str,
-    publication_year: str,
-    metric_scope: str = None,
-) -> str:
-    parts = []
-    if metric_scope:
-        parts.append(metric_scope)
-
-    parts.extend(
-        [
-            str(collection or ""),
-            str(source_key or ""),
-            pid_v2 or "",
-            pid_v3 or "",
-            pid_generic or "",
-            str(access_month or ""),
-            str(counter_access_type or ""),
-            str(access_method or ""),
-            str(publication_year or ""),
-        ]
-    )
-    return "|".join(parts)
-
-
-def generate_year_document_id(
-    collection: str,
-    source_key: str,
-    pid_v2: str,
-    pid_v3: str,
-    pid_generic: str,
-    content_language: str,
-    access_country_code: str,
-    access_year: str,
-    counter_access_type: str,
-    access_method: str,
-    publication_year: str,
-    metric_scope: str = None,
-) -> str:
-    parts = []
-    if metric_scope:
-        parts.append(metric_scope)
-
-    parts.extend(
-        [
-            str(collection or ""),
-            str(source_key or ""),
-            pid_v2 or "",
-            pid_v3 or "",
-            pid_generic or "",
-            content_language or "",
-            access_country_code or "",
-            str(access_year or ""),
-            str(counter_access_type or ""),
-            str(access_method or ""),
-            str(publication_year or ""),
-        ]
-    )
-    return "|".join(parts)
diff --git a/metrics/counter/indexing/__init__.py b/metrics/counter/indexing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/counter/indexing/converter.py b/metrics/counter/indexing/converter.py
new file mode 100644
index 0000000..4b4ab1f
--- /dev/null
+++ b/metrics/counter/indexing/converter.py
@@ -0,0 +1,58 @@
+from metrics.counter.indexing.engines.article import ArticlePipeline
+from metrics.counter.indexing.engines.base import DocumentPipeline
+from metrics.counter.indexing.engines.book import BookPipeline
+from metrics.counter.indexing.engines.dataset import DatasetPipeline
+from metrics.counter.indexing.engines.preprint import PreprintPipeline
+
+_PIPELINES = {
+    "article": ArticlePipeline(),
+    "preprint": PreprintPipeline(),
+    "dataset": DatasetPipeline(),
+    "book": BookPipeline(),
+    "chapter": BookPipeline(),
+}
+_DEFAULT = DocumentPipeline()
+
+
+def convert(data):
+    if not isinstance(data, dict):
+        return {"month": {}, "year": {}}
+
+    month_data = {}
+    month_unique_state = _initialize_unique_state()
+    year_data = {}
+    year_unique_state = _initialize_unique_state()
+
+    for value in data.values():
+        pipeline = _get_pipeline(value)
+        pipeline.accumulate(
+            data=month_data,
+            unique_state=month_unique_state,
+            value=value,
+            granularity="month",
+        )
+        pipeline.accumulate(
+            data=year_data,
+            unique_state=year_unique_state,
+            value=value,
+            granularity="year",
+        )
+
+    return {"month": month_data, "year": year_data}
+
+
+def _get_pipeline(value):
+    collection = value.get("collection")
+    if collection == "books":
+        return _PIPELINES["book"]
+
+    return _PIPELINES.get(value.get("document_type"), _DEFAULT)
+
+
+def _initialize_unique_state():
+    return {
+        "item_investigations": set(),
+        "item_requests": set(),
+        "title_investigations": set(),
+        "title_requests": set(),
+    }
diff --git a/metrics/counter/indexing/engines/__init__.py b/metrics/counter/indexing/engines/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/counter/indexing/engines/article.py b/metrics/counter/indexing/engines/article.py
new file mode 100644
index 0000000..14114dc
--- /dev/null
+++ b/metrics/counter/indexing/engines/article.py
@@ -0,0 +1,11 @@
+from metrics.counter.indexing.engines.base import DocumentPipeline
+
+
+class ArticlePipeline(DocumentPipeline):
+    data_type = "Article"
+
+    def _resolve_parent_data_type(self, value):
+        source_type = (value.get("source") or {}).get("source_type")
+        if source_type == "journal":
+            return "Journal"
+        return None
diff --git a/metrics/counter/indexing/engines/base.py b/metrics/counter/indexing/engines/base.py
new file mode 100644
index 0000000..902cf83
--- /dev/null
+++ b/metrics/counter/indexing/engines/base.py
@@ -0,0 +1,371 @@
+from scielo_usage_counter.counter import get_valid_clicks, is_request
+
+
+class DocumentPipeline:
+    data_type = "Other"
+
+    def accumulate(self, data, unique_state, value, granularity):
+        if not isinstance(value, dict):
+            return
+
+        document_id = self._generate_document_id(value, granularity)
+        document = data.setdefault(
+            document_id,
+            self._build_document(value=value, granularity=granularity),
+        )
+
+        self._apply_totals(
+            document=document,
+            click_timestamps=value.get("click_timestamps"),
+            click_timestamps_by_url=value.get("click_timestamps_by_url"),
+            content_type=value.get("content_type"),
+        )
+        self._apply_uniques(
+            document=document,
+            unique_state=unique_state,
+            scope="item",
+            document_id=document_id,
+            user_session_id=value.get("user_session_id"),
+            is_request_event=is_request(value.get("content_type")),
+        )
+
+    def _generate_document_id(
+        self, value, granularity, metric_scope=None, pid_generic=None
+    ):
+        pid_generic = pid_generic or value.get("pid_generic")
+        publication_year = str(value.get("publication_year") or "0001")
+        if granularity == "month":
+            access_month = (
+                value.get("access_date", "")[:7] if value.get("access_date") else ""
+            )
+            return _generate_month_document_id(
+                collection=value.get("collection"),
+                source_key=value.get("source_key"),
+                pid_v2=value.get("pid_v2"),
+                pid_v3=value.get("pid_v3"),
+                pid_generic=pid_generic,
+                access_month=access_month,
+                counter_access_type=value.get("counter_access_type") or "Open",
+                access_method=value.get("access_method") or "Regular",
+                publication_year=publication_year,
+                metric_scope="title" if metric_scope == "title" else None,
+            )
+
+        return _generate_year_document_id(
+            collection=value.get("collection"),
+            source_key=value.get("source_key"),
+            pid_v2=value.get("pid_v2"),
+            pid_v3=value.get("pid_v3"),
+            pid_generic=pid_generic,
+            content_language=value.get("content_language"),
+            access_country_code=value.get("access_country_code"),
+            access_year=value.get("access_year"),
+            counter_access_type=value.get("counter_access_type") or "Open",
+            access_method=value.get("access_method") or "Regular",
+            publication_year=publication_year,
+            metric_scope="title" if metric_scope == "title" else None,
+        )
+
+    def _build_document(self, value, granularity, **kwargs):
+        document_type = value.get("document_type")
+        document_id = self._resolve_document_id(value)
+
+        base_document = {
+            "collection": value.get("collection"),
+            "source": self._build_source(value.get("source")),
+            "document": self._build_document_section(
+                value=value,
+                document_id=document_id,
+                document_type=document_type,
+            ),
+            "counter": self._build_counter_section(value),
+            "total_requests": 0,
+            "total_investigations": 0,
+            "unique_requests": 0,
+            "unique_investigations": 0,
+        }
+
+        base_document["access"] = self._build_access(value, granularity)
+        if granularity == "month":
+            base_document["daily_metrics"] = self._build_daily_metrics(value)
+        return base_document
+
+    def _resolve_document_id(self, value):
+        return value.get("pid_v3") or value.get("pid_v2") or value.get("pid_generic")
+
+    def _resolve_parent_data_type(self, value):
+        return None
+
+    def _build_counter_section(self, value):
+        return _strip_empty_values(
+            {
+                "metric_scope": "item",
+                "data_type": self.data_type,
+                "parent_data_type": self._resolve_parent_data_type(value),
+                "access_type": value.get("counter_access_type") or "Open",
+                "access_method": value.get("access_method") or "Regular",
+            }
+        )
+
+    def _build_document_section(
+        self,
+        value,
+        document_id,
+        document_type,
+        parent_id=None,
+        source_identifiers=None,
+        metric_scope="item",
+    ):
+        document = value.get("document") or {}
+        title = document.get("title")
+        if metric_scope == "title":
+            title = (value.get("source") or {}).get("main_title") or title
+
+        identifiers = self._document_identifiers(
+            value=value,
+            document_id=document_id,
+            source_identifiers=source_identifiers,
+            metric_scope=metric_scope,
+        )
+
+        return _strip_empty_values(
+            {
+                "id": document_id,
+                "type": document_type,
+                "title": title,
+                "parent_id": parent_id,
+                "publication_year": value.get("publication_year"),
+                "identifiers": identifiers,
+            }
+        )
+
+    def _document_identifiers(
+        self, value, document_id, source_identifiers=None, metric_scope="item"
+    ):
+        document_identifiers = (value.get("document") or {}).get("identifiers") or {}
+        identifiers = {
+            "pid_v2": value.get("pid_v2"),
+            "pid_v3": value.get("pid_v3"),
+            "pid_generic": value.get("pid_generic"),
+        }
+        identifiers.update(document_identifiers)
+        return _strip_empty_identifiers(identifiers, canonical_id=document_id)
+
+    @staticmethod
+    def _build_source(source):
+        source = source or {}
+        source_id = source.get("source_id")
+        source_type = source.get("source_type")
+        identifiers = _strip_empty_identifiers(
+            source.get("identifiers") or {}, canonical_id=source_id
+        )
+
+        return _strip_empty_values(
+            {
+                "id": source_id,
+                "type": source_type,
+                "title": source.get("main_title"),
+                "scielo_issn": None
+                if source_type == "book"
+                else source.get("scielo_issn"),
+                "acronym": source.get("acronym"),
+                "publisher_name": source.get("publisher_name"),
+                "subject_area_capes": source.get("subject_area_capes"),
+                "subject_area_wos": source.get("subject_area_wos"),
+                "access_type": source.get("access_type"),
+                "city": source.get("city"),
+                "country": source.get("country"),
+                "identifiers": identifiers,
+            }
+        )
+
+    @staticmethod
+    def _build_access(value, granularity):
+        if granularity == "month":
+            return {
+                "month": value.get("access_date", "")[:7]
+                if value.get("access_date")
+                else ""
+            }
+
+        return _strip_empty_values(
+            {
+                "year": value.get("access_year"),
+                "country_code": value.get("access_country_code"),
+                "content_language": value.get("content_language"),
+            }
+        )
+
+    @staticmethod
+    def _build_daily_metrics(value):
+        day = value.get("access_date", "")[-2:] if value.get("access_date") else "01"
+        return {
+            day: {
+                "total_requests": 0,
+                "total_investigations": 0,
+                "unique_requests": 0,
+                "unique_investigations": 0,
+            }
+        }
+
+    @staticmethod
+    def _apply_totals(
+        document, click_timestamps, content_type, click_timestamps_by_url=None
+    ):
+        number_of_clicks = _count_valid_clicks(
+            click_timestamps=click_timestamps,
+            click_timestamps_by_url=click_timestamps_by_url,
+        )
+
+        document["total_investigations"] += number_of_clicks
+        if is_request(content_type):
+            document["total_requests"] += number_of_clicks
+
+        if "daily_metrics" in document:
+            day_key = list(document["daily_metrics"].keys())[0]
+            document["daily_metrics"][day_key][
+                "total_investigations"
+            ] += number_of_clicks
+            if is_request(content_type):
+                document["daily_metrics"][day_key]["total_requests"] += number_of_clicks
+
+    @staticmethod
+    def _apply_uniques(
+        document,
+        unique_state,
+        scope,
+        document_id,
+        user_session_id,
+        is_request_event,
+    ):
+        if not user_session_id:
+            return
+
+        inv_bucket = unique_state[f"{scope}_investigations"]
+        inv_key = (document_id, user_session_id)
+        add_investigation = inv_key not in inv_bucket
+        if add_investigation:
+            inv_bucket.add(inv_key)
+
+        add_request = False
+        if is_request_event:
+            req_bucket = unique_state[f"{scope}_requests"]
+            req_key = (document_id, user_session_id)
+            add_request = req_key not in req_bucket
+            if add_request:
+                req_bucket.add(req_key)
+
+        _increment_document_uniques(
+            document=document,
+            add_investigation=add_investigation,
+            add_request=add_request,
+        )
+
+
+def _increment_document_uniques(document, add_investigation=False, add_request=False):
+    if add_investigation:
+        document["unique_investigations"] += 1
+    if add_request:
+        document["unique_requests"] += 1
+
+    if "daily_metrics" in document:
+        day_key = list(document["daily_metrics"].keys())[0]
+        if add_investigation:
+            document["daily_metrics"][day_key]["unique_investigations"] += 1
+        if add_request:
+            document["daily_metrics"][day_key]["unique_requests"] += 1
+
+
+def _count_valid_clicks(click_timestamps, click_timestamps_by_url=None):
+    if isinstance(click_timestamps_by_url, dict) and click_timestamps_by_url:
+        return sum(
+            get_valid_clicks(timestamps or {})
+            for timestamps in click_timestamps_by_url.values()
+        )
+    return get_valid_clicks(click_timestamps or {})
+
+
+def _strip_empty_identifiers(identifiers, canonical_id=None):
+    compact = {}
+    canonical_value = str(canonical_id or "").strip().upper()
+    for key, value in (identifiers or {}).items():
+        if value in (None, "", [], {}, ()):
+            continue
+        if canonical_value and str(value).strip().upper() == canonical_value:
+            continue
+        compact[key] = value
+    return compact
+
+
+def _strip_empty_values(data):
+    return {
+        key: value for key, value in data.items() if value not in (None, "", [], {}, ())
+    }
+
+
+def _generate_month_document_id(
+    collection,
+    source_key,
+    pid_v2,
+    pid_v3,
+    pid_generic,
+    access_month,
+    counter_access_type,
+    access_method,
+    publication_year,
+    metric_scope=None,
+):
+    parts = []
+    if metric_scope:
+        parts.append(metric_scope)
+
+    parts.extend(
+        [
+            str(collection or ""),
+            str(source_key or ""),
+            pid_v2 or "",
+            pid_v3 or "",
+            pid_generic or "",
+            str(access_month or ""),
+            str(counter_access_type or ""),
+            str(access_method or ""),
+            str(publication_year or ""),
+        ]
+    )
+    return "|".join(parts)
+
+
+def _generate_year_document_id(
+    collection,
+    source_key,
+    pid_v2,
+    pid_v3,
+    pid_generic,
+    content_language,
+    access_country_code,
+    access_year,
+    counter_access_type,
+    access_method,
+    publication_year,
+    metric_scope=None,
+):
+    parts = []
+    if metric_scope:
+        parts.append(metric_scope)
+
+    parts.extend(
+        [
+            str(collection or ""),
+            str(source_key or ""),
+            pid_v2 or "",
+            pid_v3 or "",
+            pid_generic or "",
+            content_language or "",
+            access_country_code or "",
+            str(access_year or ""),
+            str(counter_access_type or ""),
+            str(access_method or ""),
+            str(publication_year or ""),
+        ]
+    )
+    return "|".join(parts)
diff --git a/metrics/counter/indexing/engines/book.py b/metrics/counter/indexing/engines/book.py
new file mode 100644
index 0000000..0ec3bd2
--- /dev/null
+++ b/metrics/counter/indexing/engines/book.py
@@ -0,0 +1,195 @@
+from scielo_usage_counter.counter import is_request
+
+from metrics.counter.indexing.engines.base import (
+    DocumentPipeline,
+    _strip_empty_identifiers,
+    _strip_empty_values,
+)
+
+
+class BookPipeline(DocumentPipeline):
+    def accumulate(self, data, unique_state, value, granularity):
+        if not isinstance(value, dict):
+            return
+
+        if _should_create_item_document(value):
+            self._accumulate_item(data, unique_state, value, granularity)
+
+        title_pid_generic = _extract_title_pid_generic(value)
+        if not title_pid_generic:
+            return
+
+        self._accumulate_title(
+            data, unique_state, value, granularity, title_pid_generic
+        )
+
+    def _accumulate_item(self, data, unique_state, value, granularity):
+        item_document_id = self._generate_document_id(
+            value,
+            granularity,
+            metric_scope="item",
+        )
+        item_document = data.setdefault(
+            item_document_id,
+            self._build_document(
+                value=value,
+                granularity=granularity,
+                metric_scope="item",
+            ),
+        )
+        self._apply_totals(
+            document=item_document,
+            click_timestamps=value.get("click_timestamps"),
+            click_timestamps_by_url=value.get("click_timestamps_by_url"),
+            content_type=value.get("content_type"),
+        )
+        self._apply_uniques(
+            document=item_document,
+            unique_state=unique_state,
+            scope="item",
+            document_id=item_document_id,
+            user_session_id=value.get("user_session_id"),
+            is_request_event=is_request(value.get("content_type")),
+        )
+
+    def _accumulate_title(
+        self, data, unique_state, value, granularity, title_pid_generic
+    ):
+        title_document_id = self._generate_document_id(
+            value,
+            granularity,
+            metric_scope="title",
+            pid_generic=title_pid_generic,
+        )
+        title_document = data.setdefault(
+            title_document_id,
+            self._build_document(
+                value=value,
+                granularity=granularity,
+                metric_scope="title",
+                pid_generic=title_pid_generic,
+                document_type="book",
+            ),
+        )
+        self._apply_totals(
+            document=title_document,
+            click_timestamps=value.get("click_timestamps"),
+            click_timestamps_by_url=value.get("click_timestamps_by_url"),
+            content_type=value.get("content_type"),
+        )
+        self._apply_uniques(
+            document=title_document,
+            unique_state=unique_state,
+            scope="title",
+            document_id=title_document_id,
+            user_session_id=value.get("user_session_id"),
+            is_request_event=is_request(value.get("content_type")),
+        )
+
+    def _build_document(self, value, granularity, **kwargs):
+        metric_scope = kwargs.get("metric_scope") or "item"
+        pid_generic = kwargs.get("pid_generic")
+        document_type = kwargs.get("document_type")
+
+        document_id = pid_generic or value.get("pid_generic")
+        parent_id = _extract_title_pid_generic(value, fallback=document_id)
+        if parent_id == document_id or metric_scope == "title":
+            parent_id = None
+        raw_source = value.get("source") or {}
+        source = self._build_source(raw_source)
+
+        base_document = {
+            "collection": value.get("collection"),
+            "source": source,
+            "document": self._build_document_section(
+                value=value,
+                document_id=document_id,
+                document_type=document_type or value.get("document_type"),
+                parent_id=parent_id,
+                source_identifiers=raw_source.get("identifiers"),
+                metric_scope=metric_scope,
+            ),
+            "counter": _strip_empty_values(
+                {
+                    "metric_scope": metric_scope,
+                    "data_type": "Book" if metric_scope == "title" else "Book_Segment",
+                    "parent_data_type": "Book" if metric_scope != "title" else None,
+                    "access_type": value.get("counter_access_type") or "Open",
+                    "access_method": value.get("access_method") or "Regular",
+                }
+            ),
+            "total_requests": 0,
+            "total_investigations": 0,
+            "unique_requests": 0,
+            "unique_investigations": 0,
+        }
+
+        base_document["access"] = self._build_access(value, granularity)
+        if granularity == "month":
+            base_document["daily_metrics"] = self._build_daily_metrics(value)
+        return base_document
+
+    def _document_identifiers(
+        self, value, document_id, source_identifiers=None, metric_scope="item"
+    ):
+        if metric_scope == "title":
+            identifiers = _book_identifiers_from_pid(document_id)
+            identifiers.update(source_identifiers or {})
+            return _strip_empty_identifiers(identifiers, canonical_id=document_id)
+
+        document_identifiers = (value.get("document") or {}).get("identifiers") or {}
+        identifiers = {
+            "pid_v2": value.get("pid_v2"),
+            "pid_v3": value.get("pid_v3"),
+            "pid_generic": value.get("pid_generic"),
+        }
+        identifiers.update(document_identifiers)
+        identifiers.update(_book_identifiers_from_pid(value.get("pid_generic")))
+        identifiers.update(source_identifiers or {})
+        return _strip_empty_identifiers(identifiers, canonical_id=document_id)
+
+
+def _should_create_item_document(value):
+    if not value.get("pid_generic"):
+        return False
+    if value.get("document_type") == "book" and not is_request(
+        value.get("content_type")
+    ):
+        return False
+    return True
+
+
+def _extract_title_pid_generic(value, fallback=None):
+    title_pid_generic = value.get("title_pid_generic")
+    if title_pid_generic:
+        return title_pid_generic
+
+    pid_generic = value.get("pid_generic")
+    if "/CHAPTER:" in (pid_generic or "").upper():
+        return pid_generic.upper().split("/CHAPTER:")[0]
+
+    source = value.get("source") or {}
+    source_id = source.get("source_id")
+    if source_id:
+        return f"BOOK:{str(source_id).upper()}"
+
+    return fallback
+
+
+def _book_identifiers_from_pid(pid_generic):
+    value = str(pid_generic or "")
+    if not value.upper().startswith("BOOK:"):
+        return {}
+
+    identifiers = {}
+    parts = value.split("/", 1)
+    book_id = parts[0].split(":", 1)[1] if ":" in parts[0] else ""
+    if book_id:
+        identifiers["book_id"] = book_id
+
+    if len(parts) > 1 and parts[1].upper().startswith("CHAPTER:"):
+        chapter_id = parts[1].split(":", 1)[1] if ":" in parts[1] else ""
+        if chapter_id:
+            identifiers["chapter_id"] = chapter_id
+
+    return identifiers
diff --git a/metrics/counter/indexing/engines/dataset.py b/metrics/counter/indexing/engines/dataset.py
new file mode 100644
index 0000000..2ea60d8
--- /dev/null
+++ b/metrics/counter/indexing/engines/dataset.py
@@ -0,0 +1,5 @@
+from metrics.counter.indexing.engines.base import DocumentPipeline
+
+
+class DatasetPipeline(DocumentPipeline):
+    data_type = "Dataset"
diff --git a/metrics/counter/indexing/engines/preprint.py b/metrics/counter/indexing/engines/preprint.py
new file mode 100644
index 0000000..5698be2
--- /dev/null
+++ b/metrics/counter/indexing/engines/preprint.py
@@ -0,0 +1,17 @@
+from metrics.counter.indexing.engines.base import DocumentPipeline, _strip_empty_values
+
+
+class PreprintPipeline(DocumentPipeline):
+    data_type = "Article"
+
+    def _build_counter_section(self, value):
+        return _strip_empty_values(
+            {
+                "metric_scope": "item",
+                "data_type": self.data_type,
+                "parent_data_type": self._resolve_parent_data_type(value),
+                "article_version": "Preprint",
+                "access_type": value.get("counter_access_type") or "Open",
+                "access_method": value.get("access_method") or "Regular",
+            }
+        )
diff --git a/metrics/counter/parser.py b/metrics/counter/parser.py
deleted file mode 100644
index 2081e5d..0000000
--- a/metrics/counter/parser.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import logging
-
-from scielo_usage_counter.translator.classic import URLTranslatorClassicSite
-from scielo_usage_counter.translator.books import URLTranslatorBooksSite
-from scielo_usage_counter.translator.dataverse import URLTranslatorDataverseSite
-from scielo_usage_counter.translator.opac import URLTranslatorOPACSite
-from scielo_usage_counter.translator.opac_alpha import URLTranslatorOPACAlphaSite
-from scielo_usage_counter.translator.preprints import URLTranslatorPreprintsSite
-
-from core.utils.date_utils import get_date_obj
-
-
-def extract_date_from_validation_dict(validation):
-    """
-    Extracts the date from the validation dict of a log file.
-
-    Args:
-        validation (dict): The validation dict of the log file.
-
-    Returns:
-        datetime.date: The extracted date.
-    """
-    try:
-        date_str = validation.get('probably_date')
-        return get_date_obj(date_str, '%Y-%m-%d')
-    except Exception as e:
-        logging.error(f"Failed to extract date from validation: {e}")
-        return None
-
-
-def translator_class_name_to_obj(name: str):
-    """
-    Translates a class name to a class object."
-
-    Parameters:
-        name (str): The name of the URL translator site.
-    """
-    if not name or not isinstance(name, str):
-        return None
-    
-    translator_classes = {
-        'books': URLTranslatorBooksSite,
-        'classic': URLTranslatorClassicSite,
-        'dataverse': URLTranslatorDataverseSite,
-        'opac': URLTranslatorOPACSite,
-        'opac_alpha': URLTranslatorOPACAlphaSite,
-        'preprints': URLTranslatorPreprintsSite
-    }
-    return translator_classes.get(name.lower())
diff --git a/metrics/management/__init__.py b/metrics/management/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/metrics/management/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/metrics/management/commands/__init__.py b/metrics/management/commands/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/metrics/management/commands/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/metrics/management/commands/export_book_r51_monthly_metrics.py b/metrics/management/commands/export_book_r51_monthly_metrics.py
deleted file mode 100644
index 1d78df0..0000000
--- a/metrics/management/commands/export_book_r51_monthly_metrics.py
+++ /dev/null
@@ -1,445 +0,0 @@
-import csv
-import json
-from collections import defaultdict
-from pathlib import Path
-
-from device_detector import DeviceDetector
-from django.core.management.base import BaseCommand, CommandError
-from scielo_usage_counter.translator.books import URLTranslatorBooksSite
-
-from collection.models import Collection
-from document.models import Document
-from metrics.counter import access
-from metrics.counter import documents as index_docs
-from resources.models import MMDB, RobotUserAgent
-from scielo_usage_counter import log_handler, url_translator
-from source.models import Source
-
-
-class Command(BaseCommand):
-    help = (
-        "Generate COUNTER R5.1 monthly book metrics from one or more log files, "
-        "writing item and title CSV outputs."
-    )
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            "--input",
-            dest="inputs",
-            action="append",
-            required=True,
-            help="Input log file path. Repeat --input for multiple files.",
-        )
-        parser.add_argument(
-            "--item-output",
-            required=True,
-            help="Output CSV path for item-level monthly metrics.",
-        )
-        parser.add_argument(
-            "--title-output",
-            required=True,
-            help="Output CSV path for title-level monthly metrics.",
-        )
-        parser.add_argument(
-            "--summary-output",
-            help="Optional JSON path with parse and totals summary.",
-        )
-        parser.add_argument(
-            "--collection",
-            default="books",
-            help="Collection acronym (default: books).",
-        )
-        parser.add_argument(
-            "--robots-source",
-            choices=sorted(RobotUserAgent.SOURCE_CHOICES),
-            default=RobotUserAgent.SOURCE_ALL,
-            help="Which active robot list to use: all, counter, or scielo.",
-        )
-
-    def handle(self, *args, **options):
-        input_paths = [Path(value).expanduser() for value in options["inputs"]]
-        item_output = Path(options["item_output"]).expanduser()
-        title_output = Path(options["title_output"]).expanduser()
-        summary_output = (
-            Path(options["summary_output"]).expanduser()
-            if options.get("summary_output")
-            else None
-        )
-
-        for path in input_paths:
-            if not path.exists():
-                raise CommandError(f"Input file not found: {path}")
-
-        collection = Collection.objects.filter(acron3=options["collection"]).first()
-        if not collection:
-            raise CommandError(f"Collection not found: {options['collection']}")
-
-        robots_source = options["robots_source"]
-        robots_list = RobotUserAgent.get_patterns(source=robots_source)
-        if not robots_list:
-            raise CommandError(
-                f"No robot user agents found in database for source {robots_source}."
-            )
-
-        mmdb = MMDB.objects.order_by("-created").first()
-        if not mmdb:
-            raise CommandError("No MMDB found in database.")
-
-        parser = log_handler.LogParser(
-            mmdb_data=mmdb.data,
-            robots_list=robots_list,
-            output_mode="dict",
-        )
-        utm = url_translator.URLTranslationManager(
-            documents_metadata=Document.metadata(collection=collection),
-            sources_metadata=Source.metadata(collection=collection),
-            translator=URLTranslatorBooksSite,
-        )
-
-        results = {}
-        parse_summaries = []
-        ua_cache = {}
-
-        for path in input_paths:
-            self.stdout.write(f"Processing {path}...")
-            parse_summaries.append(
-                self._parse_file(
-                    path=path,
-                    parser=parser,
-                    utm=utm,
-                    collection=collection,
-                    ua_cache=ua_cache,
-                    results=results,
-                )
-            )
-
-        monthly_documents = self._build_monthly_documents(results)
-
-        self._write_item_csv(item_output, monthly_documents["item"])
-        self._write_title_csv(title_output, monthly_documents["title"])
-
-        summary = {
-            "robots_source": robots_source,
-            "raw_result_count": len(results),
-            "parse_summaries": parse_summaries,
-            "totals": {
-                "total_item_requests": sum(
-                    doc.get("total_requests", 0) for doc in monthly_documents["item"]
-                ),
-                "total_item_investigations": sum(
-                    doc.get("total_investigations", 0)
-                    for doc in monthly_documents["item"]
-                ),
-                "unique_item_requests": sum(
-                    doc.get("unique_requests", 0) for doc in monthly_documents["item"]
-                ),
-                "unique_item_investigations": sum(
-                    doc.get("unique_investigations", 0)
-                    for doc in monthly_documents["item"]
-                ),
-                "title_total_item_requests": sum(
-                    doc.get("total_requests", 0) for doc in monthly_documents["title"]
-                ),
-                "title_total_item_investigations": sum(
-                    doc.get("total_investigations", 0)
-                    for doc in monthly_documents["title"]
-                ),
-                "unique_title_requests": sum(
-                    doc.get("unique_requests", 0) for doc in monthly_documents["title"]
-                ),
-                "unique_title_investigations": sum(
-                    doc.get("unique_investigations", 0)
-                    for doc in monthly_documents["title"]
-                ),
-            },
-        }
-
-        if summary_output:
-            summary_output.parent.mkdir(parents=True, exist_ok=True)
-            summary_output.write_text(json.dumps(summary, indent=2, sort_keys=True))
-
-        self.stdout.write(self.style.SUCCESS(f"Item CSV written to {item_output}"))
-        self.stdout.write(self.style.SUCCESS(f"Title CSV written to {title_output}"))
-        if summary_output:
-            self.stdout.write(
-                self.style.SUCCESS(f"Summary JSON written to {summary_output}")
-            )
-
-    def _parse_file(self, path, parser, utm, collection, ua_cache, results):
-        stats = defaultdict(int)
-        imported = 0
-
-        with path.open("rb") as fh:
-            for raw_line in fh:
-                stats["lines_parsed"] += 1
-
-                try:
-                    line = raw_line.decode().strip()
-                except UnicodeDecodeError:
-                    line = raw_line.decode("utf-8", errors="ignore").strip()
-
-                match, ip_value = parser.match_with_best_pattern(line)
-                if not match:
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                data = match.groupdict()
-                is_bunny = "unix_ts" in data
-                method = "GET" if is_bunny else data.get("method")
-                status = data.get("status")
-                user_agent = parser.format_user_agent(data.get("user_agent"))
-                url = data.get("path")
-                ip_address = ip_value
-
-                if not parser.has_valid_method(method):
-                    stats["ignored_lines_invalid_method"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                if not parser.has_valid_status(status):
-                    if parser.status_is_redirect(status):
-                        stats["ignored_lines_http_redirects"] += 1
-                    elif parser.status_is_error(status):
-                        stats["ignored_lines_http_errors"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                if parser.user_agent_is_bot(user_agent):
-                    stats["ignored_lines_bot"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                if not parser.has_supported_url(url):
-                    stats["ignored_lines_static_resources"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                if is_bunny:
-                    local_datetime = parser.format_date(data.get("unix_ts"), None)
-                    country_code = data.get(
-                        "country"
-                    ) or parser.geoip.ip_to_country_code(ip_address)
-                else:
-                    local_datetime = parser.format_date(
-                        data.get("date"), data.get("timezone")
-                    )
-                    country_code = parser.geoip.ip_to_country_code(ip_address)
-
-                if not local_datetime:
-                    stats["ignored_lines_invalid_local_datetime"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                if not country_code:
-                    stats["ignored_lines_invalid_country_code"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                device = ua_cache.get(user_agent)
-                if device is None:
-                    try:
-                        device = DeviceDetector(user_agent).parse()
-                    except ZeroDivisionError:
-                        stats["ignored_lines_invalid_user_agent"] += 1
-                        stats["total_ignored_lines"] += 1
-                        ua_cache[user_agent] = False
-                        continue
-                    ua_cache[user_agent] = device
-                elif device is False:
-                    stats["ignored_lines_invalid_user_agent"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                client_name = parser.format_client_name(device)
-                client_version = parser.format_client_version(device)
-
-                if not client_name:
-                    stats["ignored_lines_invalid_client_name"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                if not client_version:
-                    stats["ignored_lines_invalid_client_version"] += 1
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                translated = utm.translate(url)
-                item_access_data = access.extract_item_access_data(
-                    collection.acron3,
-                    translated,
-                )
-                is_valid, _ = access.is_valid_item_access_data(
-                    item_access_data,
-                    utm,
-                    ignore_utm_validation=True,
-                )
-                if not is_valid:
-                    stats["total_ignored_lines"] += 1
-                    continue
-
-                access.update_results_with_item_access_data(
-                    results,
-                    item_access_data,
-                    {
-                        "client_name": client_name,
-                        "client_version": client_version,
-                        "ip_address": ip_address,
-                        "country_code": country_code,
-                        "local_datetime": local_datetime,
-                        "url": url,
-                    },
-                )
-                imported += 1
-                stats["total_imported_lines"] += 1
-
-        return {"path": str(path), "valid_lines_used": imported, **stats}
-
-    def _build_monthly_documents(self, results):
-        documents = index_docs.convert_raw_results_to_index_documents(results)
-        item_documents = {}
-        title_documents = {}
-
-        for doc in documents["month"].values():
-            access = doc.get("access") or {}
-            counter = doc.get("counter") or {}
-            document = doc.get("document") or {}
-            year_month = access.get("month", "")
-            scope = counter.get("metric_scope", "item")
-            if scope == "title":
-                title_id = document.get("id")
-                key = (
-                    year_month,
-                    title_id,
-                    document.get("type"),
-                )
-                if key not in title_documents:
-                    title_documents[key] = {
-                        "year_month": year_month,
-                        "title_pid_generic": title_id,
-                        "document_type": document.get("type"),
-                        "total_requests": 0,
-                        "total_investigations": 0,
-                        "unique_requests": 0,
-                        "unique_investigations": 0,
-                    }
-                title_documents[key]["total_requests"] += doc.get("total_requests", 0)
-                title_documents[key]["total_investigations"] += doc.get(
-                    "total_investigations", 0
-                )
-                title_documents[key]["unique_requests"] += doc.get("unique_requests", 0)
-                title_documents[key]["unique_investigations"] += doc.get(
-                    "unique_investigations", 0
-                )
-                continue
-
-            item_id = document.get("id")
-            title_id = document.get("parent_id") or item_id
-            key = (
-                year_month,
-                title_id,
-                item_id,
-                document.get("type"),
-            )
-            if key not in item_documents:
-                item_documents[key] = {
-                    "year_month": year_month,
-                    "title_pid_generic": title_id,
-                    "segment_pid_generic": item_id,
-                    "document_type": document.get("type"),
-                    "total_requests": 0,
-                    "total_investigations": 0,
-                    "unique_requests": 0,
-                    "unique_investigations": 0,
-                }
-            item_documents[key]["total_requests"] += doc.get("total_requests", 0)
-            item_documents[key]["total_investigations"] += doc.get(
-                "total_investigations", 0
-            )
-            item_documents[key]["unique_requests"] += doc.get("unique_requests", 0)
-            item_documents[key]["unique_investigations"] += doc.get(
-                "unique_investigations", 0
-            )
-
-        return {
-            "item": list(item_documents.values()),
-            "title": list(title_documents.values()),
-        }
-
-    @staticmethod
-    def _write_item_csv(path, item_documents):
-        path.parent.mkdir(parents=True, exist_ok=True)
-        with path.open("w", newline="") as fh:
-            writer = csv.DictWriter(
-                fh,
-                fieldnames=[
-                    "year_month",
-                    "title_pid_generic",
-                    "segment_pid_generic",
-                    "document_type",
-                    "total_item_requests",
-                    "total_item_investigations",
-                    "unique_item_requests",
-                    "unique_item_investigations",
-                ],
-            )
-            writer.writeheader()
-            for doc in sorted(
-                item_documents,
-                key=lambda item: (
-                    item.get("year_month", ""),
-                    item.get("title_pid_generic") or "",
-                    item.get("segment_pid_generic") or "",
-                ),
-            ):
-                writer.writerow(
-                    {
-                        "year_month": doc.get("year_month", ""),
-                        "title_pid_generic": doc.get("title_pid_generic"),
-                        "segment_pid_generic": doc.get("segment_pid_generic"),
-                        "document_type": doc.get("document_type"),
-                        "total_item_requests": doc.get("total_requests", 0),
-                        "total_item_investigations": doc.get("total_investigations", 0),
-                        "unique_item_requests": doc.get("unique_requests", 0),
-                        "unique_item_investigations": doc.get(
-                            "unique_investigations", 0
-                        ),
-                    }
-                )
-
-    @staticmethod
-    def _write_title_csv(path, title_documents):
-        path.parent.mkdir(parents=True, exist_ok=True)
-        with path.open("w", newline="") as fh:
-            writer = csv.DictWriter(
-                fh,
-                fieldnames=[
-                    "year_month",
-                    "title_pid_generic",
-                    "document_type",
-                    "total_item_requests",
-                    "total_item_investigations",
-                    "unique_title_requests",
-                    "unique_title_investigations",
-                ],
-            )
-            writer.writeheader()
-            for doc in sorted(
-                title_documents,
-                key=lambda item: (
-                    item.get("year_month", ""),
-                    item.get("title_pid_generic") or "",
-                ),
-            ):
-                writer.writerow(
-                    {
-                        "year_month": doc.get("year_month", ""),
-                        "title_pid_generic": doc.get("title_pid_generic"),
-                        "document_type": doc.get("document_type"),
-                        "total_item_requests": doc.get("total_requests", 0),
-                        "total_item_investigations": doc.get("total_investigations", 0),
-                        "unique_title_requests": doc.get("unique_requests", 0),
-                        "unique_title_investigations": doc.get(
-                            "unique_investigations", 0
-                        ),
-                    }
-                )
diff --git a/metrics/management/commands/schedule_cleanup_daily_payloads.py b/metrics/management/commands/schedule_cleanup_daily_payloads.py
deleted file mode 100644
index 285a23f..0000000
--- a/metrics/management/commands/schedule_cleanup_daily_payloads.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from django.core.management.base import BaseCommand
-
-from core.utils.scheduler import schedule_task
-from metrics.tasks import task_cleanup_daily_payloads
-
-
-class Command(BaseCommand):
-    help = (
-        "Schedule the periodic cleanup of exported daily metric payload files. "
-        "Runs weekly on Sunday at 03:00 UTC by default, deleting payload files "
-        "for jobs that were exported more than 7 days ago."
-    )
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            "--day-of-week",
-            default="0",
-            help="Crontab day of week (0=Sunday, 6=Saturday). Default: 0",
-        )
-        parser.add_argument(
-            "--hour",
-            default="3",
-            help="Crontab hour (0-23). Default: 3",
-        )
-        parser.add_argument(
-            "--minute",
-            default="0",
-            help="Crontab minute (0-59). Default: 0",
-        )
-        parser.add_argument(
-            "--older-than-days",
-            type=int,
-            default=7,
-            help="Only delete payloads exported more than N days ago. Default: 7",
-        )
-        parser.add_argument(
-            "--collection",
-            action="append",
-            dest="collections",
-            help="Limit cleanup to a specific collection acronym. Repeat for multiple.",
-        )
-
-    def handle(self, *args, **options):
-        celery_task_name = task_cleanup_daily_payloads.name
-
-        kwargs = {
-            "older_than_days": options["older_than_days"],
-            "collections": options.get("collections") or [],
-        }
-
-        schedule_task(
-            task=celery_task_name,
-            name=celery_task_name,
-            kwargs=kwargs,
-            description="Weekly cleanup of exported daily payload files from disk.",
-            day_of_week=options["day_of_week"],
-            hour=options["hour"],
-            minute=options["minute"],
-        )
-
-        self.stdout.write(
-            self.style.SUCCESS(
-                f"Scheduled periodic task '{celery_task_name}' "
-                f"(day_of_week={options['day_of_week']}, hour={options['hour']}, "
-                f"minute={options['minute']}, older_than_days={kwargs['older_than_days']}, "
-                f"collections={kwargs['collections'] or 'all'})."
-            )
-        )
diff --git a/metrics/migrations/0001_initial.py b/metrics/migrations/0001_initial.py
index 9746d5f..bfae3b5 100644
--- a/metrics/migrations/0001_initial.py
+++ b/metrics/migrations/0001_initial.py
@@ -28,11 +28,15 @@ class Migration(migrations.Migration):
                 ),
                 (
                     "created",
-                    models.DateTimeField(auto_now_add=True, verbose_name="Creation date"),
+                    models.DateTimeField(
+                        auto_now_add=True, verbose_name="Creation date"
+                    ),
                 ),
                 (
                     "updated",
-                    models.DateTimeField(auto_now=True, verbose_name="Last update date"),
+                    models.DateTimeField(
+                        auto_now=True, verbose_name="Last update date"
+                    ),
                 ),
                 (
                     "access_date",
@@ -85,7 +89,9 @@ class Migration(migrations.Migration):
                 ),
                 (
                     "error_message",
-                    models.TextField(blank=True, default="", verbose_name="Error Message"),
+                    models.TextField(
+                        blank=True, default="", verbose_name="Error Message"
+                    ),
                 ),
                 (
                     "export_started_at",
@@ -97,7 +103,9 @@ class Migration(migrations.Migration):
                 ),
                 (
                     "exported_at",
-                    models.DateTimeField(blank=True, null=True, verbose_name="Exported At"),
+                    models.DateTimeField(
+                        blank=True, null=True, verbose_name="Exported At"
+                    ),
                 ),
                 (
                     "collection",
diff --git a/metrics/models.py b/metrics/models.py
index aa789b5..2a7e8b2 100644
--- a/metrics/models.py
+++ b/metrics/models.py
@@ -100,8 +100,13 @@ class Meta:
         verbose_name_plural = _("Daily Metric Jobs")
         unique_together = (("collection", "access_date"),)
         indexes = [
-            models.Index(fields=["collection", "access_date"], name="metrics_daily_coll_date_idx"),
-            models.Index(fields=["status", "export_started_at"], name="metrics_daily_status_exp_idx"),
+            models.Index(
+                fields=["collection", "access_date"], name="metrics_daily_coll_date_idx"
+            ),
+            models.Index(
+                fields=["status", "export_started_at"],
+                name="metrics_daily_status_exp_idx",
+            ),
         ]
 
     def __str__(self):
diff --git a/metrics/opensearch/__init__.py b/metrics/opensearch/__init__.py
index fb9df20..e69de29 100644
--- a/metrics/opensearch/__init__.py
+++ b/metrics/opensearch/__init__.py
@@ -1,8 +0,0 @@
-from .client import OpenSearchUsageClient
-from .mappings import (
-    BOOKS_MONTH_INDEX_MAPPINGS,
-    BOOKS_YEAR_INDEX_MAPPINGS,
-    MONTH_INDEX_MAPPINGS,
-    YEAR_INDEX_MAPPINGS,
-    get_index_mappings,
-)
diff --git a/metrics/opensearch/client.py b/metrics/opensearch/client.py
index ce0de5c..271acee 100644
--- a/metrics/opensearch/client.py
+++ b/metrics/opensearch/client.py
@@ -3,12 +3,9 @@
 from django.conf import settings
 from opensearchpy import NotFoundError, OpenSearch, helpers
 
+from metrics.opensearch.mappings import get_index_mappings
 from metrics.opensearch.names import generate_month_index_name, generate_year_index_name
-
-from .mappings import get_index_mappings
-from .scripts import (
-    IDEMPOTENT_JOB_INCREMENT_SCRIPT,
-    METRIC_FIELDS,
+from metrics.opensearch.painless import (
     build_idempotent_job_increment_action,
     merge_metric_document,
 )
@@ -18,7 +15,13 @@ class OpenSearchUsageClient:
     def __init__(self, url=None, basic_auth=None, api_key=None, verify_certs=None):
         self.client = self.get_opensearch_client(url, basic_auth, api_key, verify_certs)
 
-    def get_opensearch_client(self, url=None, basic_auth=None, api_key=None, verify_certs=None):
+    def get_opensearch_client(
+        self,
+        url=None,
+        basic_auth=None,
+        api_key=None,
+        verify_certs=None,
+    ):
         url = url or getattr(settings, "OPENSEARCH_URL", None)
         basic_auth = basic_auth or getattr(settings, "OPENSEARCH_BASIC_AUTH", None)
         api_key = api_key or getattr(settings, "OPENSEARCH_API_KEY", None)
@@ -26,7 +29,11 @@ def get_opensearch_client(self, url=None, basic_auth=None, api_key=None, verify_
             verify_certs = getattr(settings, "OPENSEARCH_VERIFY_CERTS", False)
 
         if basic_auth:
-            return OpenSearch(url, http_auth=tuple(basic_auth), verify_certs=verify_certs)
+            return OpenSearch(
+                url,
+                http_auth=tuple(basic_auth),
+                verify_certs=verify_certs,
+            )
         if api_key:
             return OpenSearch(url, api_key=api_key, verify_certs=verify_certs)
         return OpenSearch(url, verify_certs=verify_certs)
@@ -56,23 +63,32 @@ def create_index_if_not_exists(self, index_name, mappings, ping_client=False):
             return
 
         if not self.client.indices.exists(index=index_name):
-            self.create_index(index_name=index_name, mappings=mappings, ping_client=False)
+            self.create_index(
+                index_name=index_name,
+                mappings=mappings,
+                ping_client=False,
+            )
 
     def ensure_usage_indexes(self, collection, access_date, index_prefix=None):
-        index_prefix = index_prefix or getattr(settings, "OPENSEARCH_INDEX_NAME", "usage")
+        index_prefix = index_prefix or getattr(
+            settings,
+            "OPENSEARCH_INDEX_NAME",
+            "usage",
+        )
         year_index = generate_year_index_name(index_prefix, collection, access_date)
         month_index = generate_month_index_name(index_prefix, collection, access_date)
 
-        self.create_index_if_not_exists(year_index, get_index_mappings(collection, "year"))
-        self.create_index_if_not_exists(month_index, get_index_mappings(collection, "month"))
+        self.create_index_if_not_exists(
+            year_index,
+            get_index_mappings(collection, "year"),
+        )
+        self.create_index_if_not_exists(
+            month_index,
+            get_index_mappings(collection, "month"),
+        )
 
         return {"year": year_index, "month": month_index}
 
-    def delete_index(self, index_name, ping_client=False):
-        if ping_client and not self.ping():
-            return
-        self.client.indices.delete(index=index_name)
-
     def index_documents(self, index_name, documents, ping_client=False):
         if ping_client and not self.ping():
             return
@@ -207,12 +223,19 @@ def sync_documents(self, index_name, documents, operation="add", ping_client=Fal
         if not documents:
             return
 
-        existing_documents = self.fetch_documents_by_ids(index_name=index_name, doc_ids=list(documents.keys()))
+        existing_documents = self.fetch_documents_by_ids(
+            index_name=index_name,
+            doc_ids=list(documents.keys()),
+        )
         upserts = {}
         deletes = []
 
         for doc_id, document in documents.items():
-            merged = merge_metric_document(existing_documents.get(doc_id), document, operation=operation)
+            merged = merge_metric_document(
+                existing_documents.get(doc_id),
+                document,
+                operation=operation,
+            )
             if merged is None:
                 if doc_id in existing_documents:
                     deletes.append(doc_id)
diff --git a/metrics/opensearch/mappings.py b/metrics/opensearch/mappings.py
index def652f..de4dbae 100644
--- a/metrics/opensearch/mappings.py
+++ b/metrics/opensearch/mappings.py
@@ -1,6 +1,6 @@
-TEXT_KEYWORD_MAPPING = {
+DISPLAY_TEXT_MAPPING = {
     "type": "text",
-    "fields": {"keyword": {"type": "keyword", "ignore_above": 512}},
+    "index": False,
 }
 
 IDENTIFIERS_MAPPING = {"type": "object", "dynamic": True}
@@ -9,7 +9,7 @@
     "properties": {
         "id": {"type": "keyword"},
         "type": {"type": "keyword"},
-        "title": TEXT_KEYWORD_MAPPING,
+        "title": DISPLAY_TEXT_MAPPING,
         "parent_id": {"type": "keyword"},
         "publication_year": {"type": "integer"},
         "identifiers": IDENTIFIERS_MAPPING,
@@ -20,10 +20,10 @@
     "properties": {
         "id": {"type": "keyword"},
         "type": {"type": "keyword"},
-        "title": TEXT_KEYWORD_MAPPING,
+        "title": DISPLAY_TEXT_MAPPING,
         "scielo_issn": {"type": "keyword"},
         "acronym": {"type": "keyword"},
-        "publisher_name": {"type": "keyword"},
+        "publisher_name": DISPLAY_TEXT_MAPPING,
         "access_type": {"type": "keyword"},
         "city": {"type": "keyword"},
         "country": {"type": "keyword"},
@@ -89,14 +89,6 @@ def _build_index_mappings(granularity):
 BOOKS_MONTH_INDEX_MAPPINGS = _build_index_mappings("month")
 
 
-METRIC_FIELDS = (
-    "total_requests",
-    "total_investigations",
-    "unique_requests",
-    "unique_investigations",
-)
-
-
 def get_index_mappings(collection, granularity):
     if granularity not in {"month", "year"}:
         raise ValueError("Granularity must be 'month' or 'year'.")
diff --git a/metrics/opensearch/names.py b/metrics/opensearch/names.py
index 1ecd493..b567d11 100644
--- a/metrics/opensearch/names.py
+++ b/metrics/opensearch/names.py
@@ -1,7 +1,7 @@
-from django.conf import settings
+from config.collections import get_collection_size
 
 
-def _validate_index_inputs(index_prefix: str, collection: str, date: str):
+def _validate_index_inputs(index_prefix, collection, date):
     if not date or not isinstance(date, str):
         raise ValueError("Date must be a non-empty string in 'YYYY-MM-DD' format.")
     if not collection or not isinstance(collection, str):
@@ -10,32 +10,22 @@ def _validate_index_inputs(index_prefix: str, collection: str, date: str):
         raise ValueError("Index prefix must be a non-empty string.")
 
 
-def _get_collection_size(collection: str) -> str:
-    return getattr(settings, "COLLECTION_ACRON3_SIZE_MAP", {}).get(collection, "small")
-
-
-def extract_access_year(date: str) -> str:
+def extract_access_year(date):
     _validate_index_inputs("usage", "tmp", date)
     return date.split("-")[0]
 
 
-def extract_access_month(date: str) -> str:
-    _validate_index_inputs("usage", "tmp", date)
-    year, month, _ = date.split("-")
-    return f"{year}{month}"
-
-
-def generate_month_index_name(index_prefix: str, collection: str, date: str) -> str:
+def generate_month_index_name(index_prefix, collection, date):
     _validate_index_inputs(index_prefix, collection, date)
-    size = _get_collection_size(collection)
+    size = get_collection_size(collection)
     if size in ("xlarge", "large"):
         return f"{index_prefix}_monthly_{collection}_{extract_access_year(date)}"
     return f"{index_prefix}_monthly_{collection}"
 
 
-def generate_year_index_name(index_prefix: str, collection: str, date: str) -> str:
+def generate_year_index_name(index_prefix, collection, date):
     _validate_index_inputs(index_prefix, collection, date)
-    size = _get_collection_size(collection)
+    size = get_collection_size(collection)
     if size in ("xlarge", "large"):
         return f"{index_prefix}_yearly_{collection}_{extract_access_year(date)}"
     return f"{index_prefix}_yearly_{collection}"
diff --git a/metrics/opensearch/scripts.py b/metrics/opensearch/painless.py
similarity index 91%
rename from metrics/opensearch/scripts.py
rename to metrics/opensearch/painless.py
index a6a5e1c..de3de81 100644
--- a/metrics/opensearch/scripts.py
+++ b/metrics/opensearch/painless.py
@@ -14,7 +14,9 @@
   return;
 }
 for (entry in params.document.entrySet()) {
-  if (!params.metric_fields.contains(entry.getKey()) && !'applied_jobs'.equals(entry.getKey()) && !'daily_metrics'.equals(entry.getKey())) {
+  if (!params.metric_fields.contains(entry.getKey())
+      && !'applied_jobs'.equals(entry.getKey())
+      && !'daily_metrics'.equals(entry.getKey())) {
     if (!ctx._source.containsKey(entry.getKey()) || ctx._source[entry.getKey()] != entry.getValue()) {
       ctx._source[entry.getKey()] = entry.getValue();
     }
@@ -36,7 +38,8 @@ def dayMetrics = dayEntry.getValue();
       ctx._source.daily_metrics[day] = new HashMap();
     }
     for (metric in params.metric_fields) {
-      def currentValue = ctx._source.daily_metrics[day].containsKey(metric) ? ctx._source.daily_metrics[day][metric] : 0;
+      def currentValue = ctx._source.daily_metrics[day].containsKey(metric)
+        ? ctx._source.daily_metrics[day][metric] : 0;
       def increment = dayMetrics.containsKey(metric) ? dayMetrics[metric] : 0;
       ctx._source.daily_metrics[day][metric] = currentValue + increment;
     }
@@ -92,7 +95,9 @@ def merge_metric_document(existing, current, operation="add"):
         for day, metrics in current["daily_metrics"].items():
             day_merged = dict(merged_daily.get(day) or {})
             for field in METRIC_FIELDS:
-                day_merged[field] = day_merged.get(field, 0) + signal * metrics.get(field, 0)
+                day_merged[field] = day_merged.get(field, 0) + signal * metrics.get(
+                    field, 0
+                )
             merged_daily[day] = day_merged
         merged["daily_metrics"] = merged_daily
 
diff --git a/metrics/services/__init__.py b/metrics/services/__init__.py
index b305681..e69de29 100644
--- a/metrics/services/__init__.py
+++ b/metrics/services/__init__.py
@@ -1,26 +0,0 @@
-from .jobs import (
-    acquire_daily_metric_job,
-    create_or_update_daily_metric_job,
-    mark_daily_metric_job_exported,
-    mark_daily_metric_job_failed,
-    release_stale_daily_metric_jobs,
-)
-from .resources import (
-    build_search_client,
-    extract_celery_queue_name,
-    fetch_required_resources,
-    get_log_files_for_collection_date,
-)
-from .parser import (
-    is_stale_parsing_log,
-    process_daily_metric_job,
-    process_line,
-    requeue_stale_parsing_log,
-    setup_parsing_environment,
-    touch_parse_heartbeat,
-)
-from .export import (
-    export_daily_metric_payload,
-    export_documents,
-    load_daily_metric_payload,
-)
diff --git a/metrics/services/daily_metric_exports.py b/metrics/services/daily_metric_exports.py
new file mode 100644
index 0000000..8933b3d
--- /dev/null
+++ b/metrics/services/daily_metric_exports.py
@@ -0,0 +1,71 @@
+import logging
+
+from metrics.models import DailyMetricJob
+from metrics.opensearch.client import OpenSearchUsageClient
+from metrics.services.export import (
+    export_daily_metric_payload,
+    load_daily_metric_payload,
+)
+from metrics.services.jobs import (
+    acquire_daily_metric_job,
+    mark_daily_metric_job_exported,
+    mark_daily_metric_job_failed,
+)
+from metrics.services.parsing.job_payloads import build_daily_metric_job_payload
+from metrics.services.resources import fetch_required_resources
+
+
+def build_and_export_daily_metric_job(job_id, track_errors=False, robots_source=None):
+    try:
+        job = acquire_daily_metric_job(job_id)
+    except DailyMetricJob.DoesNotExist:
+        logging.error("Daily metric job %s does not exist.", job_id)
+        return
+
+    if not job:
+        return
+
+    try:
+        payload = _load_or_build_payload(
+            job=job,
+            track_errors=track_errors,
+            robots_source=robots_source,
+        )
+        _export_payload(job=job, payload=payload)
+    except Exception as exc:
+        logging.error("Failed to process daily metric job %s: %s", job_id, exc)
+        mark_daily_metric_job_failed(job, exc)
+        return
+
+    mark_daily_metric_job_exported(job)
+
+
+def _load_or_build_payload(job, track_errors, robots_source):
+    payload = load_daily_metric_payload(job)
+    if payload is not None and job.payload_hash:
+        return payload
+
+    robots_list, mmdb = fetch_required_resources(robot_source=robots_source)
+    if not robots_list or not mmdb:
+        raise RuntimeError("Required parsing resources are not available.")
+
+    payload = build_daily_metric_job_payload(
+        job=job,
+        robots_list=robots_list,
+        mmdb=mmdb,
+        track_errors=track_errors,
+    )
+    job.refresh_from_db()
+    return payload
+
+
+def _export_payload(job, payload):
+    search_client = OpenSearchUsageClient()
+    if not search_client.ping():
+        raise RuntimeError("OpenSearch client is not available.")
+
+    export_daily_metric_payload(
+        search_client=search_client,
+        job=job,
+        payload=payload,
+    )
diff --git a/metrics/services/daily_payloads.py b/metrics/services/daily_payloads.py
index 8b96f7b..f908f1f 100644
--- a/metrics/services/daily_payloads.py
+++ b/metrics/services/daily_payloads.py
@@ -28,20 +28,13 @@ def resolve_storage_path(storage_path):
     return get_daily_payload_root() / storage_path
 
 
-def serialize_payload(payload):
-    return json.dumps(
-        payload,
-        ensure_ascii=True,
-        sort_keys=True,
-        separators=(",", ":"),
-    )
-
-
 def write_payload(storage_path, payload):
     resolved_path = resolve_storage_path(storage_path)
     resolved_path.parent.mkdir(parents=True, exist_ok=True)
 
-    payload_json = serialize_payload(payload)
+    payload_json = json.dumps(
+        payload, ensure_ascii=True, sort_keys=True, separators=(",", ":")
+    )
     payload_hash = hashlib.sha256(payload_json.encode("utf-8")).hexdigest()
 
     tmp_path = resolved_path.with_suffix(f"{resolved_path.suffix}.tmp")
@@ -56,18 +49,16 @@ def read_payload(storage_path):
     return json.loads(resolved_path.read_text(encoding="utf-8"))
 
 
-def delete_payload(storage_path):
-    resolved_path = resolve_storage_path(storage_path)
-    if resolved_path.exists():
-        resolved_path.unlink()
-
-
 def cleanup_exported_payloads(collections=None, older_than_days=7):
     root = get_daily_payload_root()
     if not root.exists():
         return 0
 
-    cutoff = timezone.now() - timedelta(days=older_than_days) if older_than_days and older_than_days > 0 else None
+    cutoff = (
+        timezone.now() - timedelta(days=older_than_days)
+        if older_than_days and older_than_days > 0
+        else None
+    )
 
     storage_path_to_job = {}
     db_queryset = DailyMetricJob.objects.exclude(storage_path="")
@@ -78,11 +69,13 @@ def cleanup_exported_payloads(collections=None, older_than_days=7):
 
     json_files = root.rglob("*.json")
     if collections:
-        json_files = [p for p in json_files if p.relative_to(root).parts[0] in collections]
+        json_files = [
+            p for p in json_files if p.relative_to(root).parts[0] in collections
+        ]
 
     deleted_count = 0
     for file_path in json_files:
-        if cutoff and _file_is_recent(file_path, cutoff):
+        if cutoff and file_path.stat().st_mtime >= cutoff.timestamp():
             continue
 
         storage_path = file_path.relative_to(root).as_posix()
@@ -113,10 +106,6 @@ def cleanup_exported_payloads(collections=None, older_than_days=7):
     return deleted_count
 
 
-def _file_is_recent(file_path, cutoff):
-    return file_path.stat().st_mtime >= cutoff.timestamp()
-
-
 def _cleanup_empty_dirs(root):
     for dirpath, dirnames, filenames in os.walk(root, topdown=False):
         if dirpath == str(root):
diff --git a/metrics/services/export.py b/metrics/services/export.py
index ef5d9f6..4c3def9 100644
--- a/metrics/services/export.py
+++ b/metrics/services/export.py
@@ -2,10 +2,9 @@
 
 from django.conf import settings
 
-from metrics import opensearch
+from metrics.opensearch.mappings import get_index_mappings
 from metrics.opensearch.names import generate_month_index_name, generate_year_index_name
-
-from . import daily_payloads
+from metrics.services import daily_payloads
 
 
 def load_daily_metric_payload(job):
@@ -71,14 +70,14 @@ def _sync_documents_group(
                 collection=collection,
                 date=f"{access.get('month')}-01",
             )
-            mappings = opensearch.get_index_mappings(collection, "month")
+            mappings = get_index_mappings(collection, "month")
         else:
             index_name = generate_year_index_name(
                 index_prefix=index_prefix,
                 collection=collection,
                 date=f"{access.get('year')}-01-01",
             )
-            mappings = opensearch.get_index_mappings(collection, "year")
+            mappings = get_index_mappings(collection, "year")
 
         grouped_documents.setdefault(
             index_name, {"mappings": mappings, "documents": {}}
diff --git a/metrics/services/jobs.py b/metrics/services/jobs.py
index 78f5100..3456182 100644
--- a/metrics/services/jobs.py
+++ b/metrics/services/jobs.py
@@ -6,7 +6,6 @@
 
 from log_manager import choices
 from log_manager.models import LogFile
-
 from metrics.models import DailyMetricJob
 
 
@@ -74,7 +73,9 @@ def acquire_daily_metric_job(job_id):
             DailyMetricJob.STATUS_EXPORTING,
             DailyMetricJob.STATUS_EXPORTED,
         }:
-            logging.info("Daily metric job %s is already in final/active state.", job_id)
+            logging.info(
+                "Daily metric job %s is already in final/active state.", job_id
+            )
             return None
 
         job.status = DailyMetricJob.STATUS_EXPORTING
@@ -106,7 +107,7 @@ def mark_daily_metric_job_failed(job, error_message):
     )
 
 
-def mark_daily_metric_job_exported(job, user=None):
+def mark_daily_metric_job_exported(job):
     DailyMetricJob.objects.filter(pk=job.pk).update(
         status=DailyMetricJob.STATUS_EXPORTED,
         error_message="",
@@ -120,7 +121,12 @@ def mark_daily_metric_job_exported(job, user=None):
     )
 
 
-def release_stale_daily_metric_jobs(collections=None, from_date=None, until_date=None, stale_after_minutes=60):
+def release_stale_daily_metric_jobs(
+    collections=None,
+    from_date=None,
+    until_date=None,
+    stale_after_minutes=60,
+):
     cutoff = timezone.now() - timedelta(minutes=stale_after_minutes)
     queryset = DailyMetricJob.objects.filter(
         status=DailyMetricJob.STATUS_EXPORTING,
@@ -140,9 +146,7 @@ def release_stale_daily_metric_jobs(collections=None, from_date=None, until_date
         updated=timezone.now(),
     )
     stale_hashes = {
-        log_hash
-        for job in stale_jobs
-        for log_hash in (job.input_log_hashes or [])
+        log_hash for job in stale_jobs for log_hash in (job.input_log_hashes or [])
     }
     if stale_hashes:
         LogFile.objects.filter(hash__in=stale_hashes).update(
diff --git a/metrics/services/log_parsing_jobs.py b/metrics/services/log_parsing_jobs.py
new file mode 100644
index 0000000..de5b20f
--- /dev/null
+++ b/metrics/services/log_parsing_jobs.py
@@ -0,0 +1,395 @@
+from collection.models import Collection
+from config.collections import get_collection_parse_queue
+from core.utils.date_utils import get_date_obj, get_date_range_str
+from log_manager import choices
+from log_manager.models import LogFile
+from metrics.models import DailyMetricJob
+from metrics.services.jobs import create_or_update_daily_metric_job
+
+AUTO_REEXECUTE_POLL_INTERVAL_SECONDS = 30
+
+
+def enqueue_log_parsing_jobs(
+    daily_metric_export_task,
+    wait_log_parsing_wave_task,
+    collections=None,
+    include_logs_with_error=True,
+    batch_size=5000,
+    max_log_files=None,
+    auto_reexecute=False,
+    replace=False,
+    track_errors=False,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    skip_log_hashes=None,
+    robots_source=None,
+):
+    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
+    from_date_obj = get_date_obj(from_date)
+    until_date_obj = get_date_obj(until_date)
+    enqueued_logs = 0
+    enqueued_jobs = 0
+    reached_max_log_files = False
+    enqueued_wave_job_ids = []
+    claimed_status_filters = list(_build_log_status_filters(include_logs_with_error))
+    skip_log_hashes = set(skip_log_hashes or [])
+
+    for collection in collections or Collection.acron3_list():
+        collection_obj = Collection.objects.filter(acron3=collection).first()
+        if collection_obj is None:
+            continue
+
+        result = _enqueue_collection_daily_jobs(
+            daily_metric_export_task=daily_metric_export_task,
+            collection=collection_obj,
+            from_date_obj=from_date_obj,
+            until_date_obj=until_date_obj,
+            status_filters=claimed_status_filters,
+            skip_log_hashes=skip_log_hashes,
+            enqueued_logs=enqueued_logs,
+            max_log_files=max_log_files,
+            track_errors=track_errors,
+            user_id=user_id,
+            username=username,
+            robots_source=robots_source,
+            queue_name=queue_name,
+        )
+
+        enqueued_logs += result["enqueued_logs"]
+        enqueued_jobs += result["enqueued_jobs"]
+        enqueued_wave_job_ids.extend(result["enqueued_wave_job_ids"])
+        reached_max_log_files = result["reached_max_log_files"]
+        if result["reached_max_log_files"]:
+            break
+
+    auto_reexecution_enqueued = _schedule_log_parsing_reexecution(
+        wait_log_parsing_wave_task=wait_log_parsing_wave_task,
+        should_reexecute=(
+            auto_reexecute and reached_max_log_files and bool(enqueued_wave_job_ids)
+        ),
+        wave_job_ids=enqueued_wave_job_ids,
+        collections=collections,
+        include_logs_with_error=include_logs_with_error,
+        batch_size=batch_size,
+        max_log_files=max_log_files,
+        auto_reexecute=auto_reexecute,
+        replace=replace,
+        track_errors=track_errors,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        skip_log_hashes=sorted(skip_log_hashes),
+        robots_source=robots_source,
+    )
+
+    return {
+        "enqueued_logs": enqueued_logs,
+        "enqueued_jobs": enqueued_jobs,
+        "reached_max_log_files": reached_max_log_files,
+        "auto_reexecution_enqueued": auto_reexecution_enqueued,
+    }
+
+
+def wait_log_parsing_wave(
+    log_parsing_task,
+    wait_log_parsing_wave_task,
+    wave_job_ids=None,
+    collections=None,
+    include_logs_with_error=True,
+    batch_size=5000,
+    max_log_files=None,
+    auto_reexecute=False,
+    replace=False,
+    track_errors=False,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    skip_log_hashes=None,
+    poll_interval_seconds=AUTO_REEXECUTE_POLL_INTERVAL_SECONDS,
+    robots_source=None,
+    wave_log_hashes=None,
+):
+    wave_job_ids = wave_job_ids or wave_log_hashes or []
+    if DailyMetricJob.objects.filter(
+        pk__in=wave_job_ids,
+        status__in=[DailyMetricJob.STATUS_PENDING, DailyMetricJob.STATUS_EXPORTING],
+    ).exists():
+        kwargs = _build_log_parsing_reexecution_kwargs(
+            wave_job_ids=wave_job_ids,
+            collections=collections,
+            include_logs_with_error=include_logs_with_error,
+            batch_size=batch_size,
+            max_log_files=max_log_files,
+            auto_reexecute=auto_reexecute,
+            replace=replace,
+            track_errors=track_errors,
+            from_date=from_date,
+            until_date=until_date,
+            days_to_go_back=days_to_go_back,
+            queue_name=queue_name,
+            user_id=user_id,
+            username=username,
+            skip_log_hashes=skip_log_hashes,
+            poll_interval_seconds=poll_interval_seconds,
+            robots_source=robots_source,
+        )
+        apply_kwargs = {
+            "kwargs": kwargs,
+            "countdown": poll_interval_seconds,
+        }
+        if queue_name:
+            apply_kwargs["queue"] = queue_name
+        wait_log_parsing_wave_task.apply_async(**apply_kwargs)
+        return {"wave_completed": False, "reexecution_enqueued": False}
+
+    kwargs = _build_log_parsing_kwargs(
+        collections=collections,
+        include_logs_with_error=include_logs_with_error,
+        batch_size=batch_size,
+        max_log_files=max_log_files,
+        auto_reexecute=auto_reexecute,
+        replace=replace,
+        track_errors=track_errors,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        skip_log_hashes=skip_log_hashes,
+        robots_source=robots_source,
+    )
+    apply_kwargs = {"kwargs": kwargs}
+    if queue_name:
+        apply_kwargs["queue"] = queue_name
+    log_parsing_task.apply_async(**apply_kwargs)
+    return {"wave_completed": True, "reexecution_enqueued": True}
+
+
+def _build_log_status_filters(include_logs_with_error):
+    status_filters = [choices.LOG_FILE_STATUS_QUEUED]
+    if include_logs_with_error:
+        status_filters.append(choices.LOG_FILE_STATUS_ERROR)
+    return tuple(status_filters)
+
+
+def _enqueue_collection_daily_jobs(
+    daily_metric_export_task,
+    collection,
+    from_date_obj,
+    until_date_obj,
+    status_filters,
+    skip_log_hashes,
+    enqueued_logs,
+    max_log_files,
+    track_errors,
+    user_id,
+    username,
+    robots_source,
+    queue_name,
+):
+    result = {
+        "enqueued_logs": 0,
+        "enqueued_jobs": 0,
+        "enqueued_wave_job_ids": [],
+        "reached_max_log_files": False,
+    }
+
+    access_dates = LogFile.distinct_access_dates_for_parsing(
+        collection=collection,
+        from_date=from_date_obj,
+        until_date=until_date_obj,
+        status_filters=status_filters,
+        skip_hashes=skip_log_hashes,
+    )
+
+    for access_date in access_dates:
+        log_files = LogFile.for_collection_date(
+            collection=collection,
+            access_date=access_date,
+            status_filters=status_filters,
+        )
+        log_files = [
+            log_file for log_file in log_files if log_file.hash not in skip_log_hashes
+        ]
+
+        reached_limit = False
+        if max_log_files:
+            remaining_log_slots = max_log_files - (
+                enqueued_logs + result["enqueued_logs"]
+            )
+            if remaining_log_slots <= 0:
+                result["reached_max_log_files"] = True
+                break
+            if len(log_files) > remaining_log_slots:
+                log_files = log_files[:remaining_log_slots]
+                reached_limit = True
+        result["reached_max_log_files"] = reached_limit
+
+        if not log_files:
+            continue
+
+        job = create_or_update_daily_metric_job(
+            collection=collection,
+            access_date=access_date,
+            log_files=log_files,
+        )
+        if job.status == DailyMetricJob.STATUS_EXPORTED:
+            if reached_limit:
+                break
+            continue
+
+        daily_metric_export_task.apply_async(
+            args=(job.pk, track_errors, user_id, username, robots_source),
+            queue=queue_name or get_collection_parse_queue(collection.acron3),
+        )
+        result["enqueued_wave_job_ids"].append(job.pk)
+        result["enqueued_jobs"] += 1
+        result["enqueued_logs"] += len(log_files)
+        if max_log_files and enqueued_logs + result["enqueued_logs"] >= max_log_files:
+            result["reached_max_log_files"] = True
+
+        if result["reached_max_log_files"]:
+            break
+
+    return result
+
+
+def _schedule_log_parsing_reexecution(
+    wait_log_parsing_wave_task,
+    should_reexecute,
+    wave_job_ids,
+    collections,
+    include_logs_with_error,
+    batch_size,
+    max_log_files,
+    auto_reexecute,
+    replace,
+    track_errors,
+    from_date,
+    until_date,
+    days_to_go_back,
+    queue_name,
+    user_id,
+    username,
+    skip_log_hashes,
+    robots_source=None,
+):
+    if not should_reexecute:
+        return False
+
+    kwargs = _build_log_parsing_reexecution_kwargs(
+        wave_job_ids=wave_job_ids,
+        collections=collections,
+        include_logs_with_error=include_logs_with_error,
+        batch_size=batch_size,
+        max_log_files=max_log_files,
+        auto_reexecute=auto_reexecute,
+        replace=replace,
+        track_errors=track_errors,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        skip_log_hashes=skip_log_hashes,
+        poll_interval_seconds=AUTO_REEXECUTE_POLL_INTERVAL_SECONDS,
+        robots_source=robots_source,
+    )
+
+    apply_kwargs = {"kwargs": kwargs}
+    if queue_name:
+        apply_kwargs["queue"] = queue_name
+    wait_log_parsing_wave_task.apply_async(**apply_kwargs)
+    return True
+
+
+def _build_log_parsing_reexecution_kwargs(
+    wave_job_ids,
+    collections,
+    include_logs_with_error,
+    batch_size,
+    max_log_files,
+    auto_reexecute,
+    replace,
+    track_errors,
+    from_date,
+    until_date,
+    days_to_go_back,
+    queue_name,
+    user_id,
+    username,
+    skip_log_hashes,
+    poll_interval_seconds,
+    robots_source=None,
+):
+    kwargs = {
+        "wave_job_ids": wave_job_ids,
+        "collections": collections,
+        "include_logs_with_error": include_logs_with_error,
+        "batch_size": batch_size,
+        "max_log_files": max_log_files,
+        "auto_reexecute": auto_reexecute,
+        "replace": replace,
+        "track_errors": track_errors,
+        "from_date": from_date,
+        "until_date": until_date,
+        "days_to_go_back": days_to_go_back,
+        "queue_name": queue_name,
+        "user_id": user_id,
+        "username": username,
+        "skip_log_hashes": skip_log_hashes,
+        "poll_interval_seconds": poll_interval_seconds,
+    }
+    if robots_source is not None:
+        kwargs["robots_source"] = robots_source
+    return kwargs
+
+
+def _build_log_parsing_kwargs(
+    collections,
+    include_logs_with_error,
+    batch_size,
+    max_log_files,
+    auto_reexecute,
+    replace,
+    track_errors,
+    from_date,
+    until_date,
+    days_to_go_back,
+    queue_name,
+    user_id,
+    username,
+    skip_log_hashes,
+    robots_source=None,
+):
+    kwargs = {
+        "collections": collections,
+        "include_logs_with_error": include_logs_with_error,
+        "batch_size": batch_size,
+        "max_log_files": max_log_files,
+        "auto_reexecute": auto_reexecute,
+        "replace": replace,
+        "track_errors": track_errors,
+        "from_date": from_date,
+        "until_date": until_date,
+        "days_to_go_back": days_to_go_back,
+        "queue_name": queue_name,
+        "user_id": user_id,
+        "username": username,
+        "skip_log_hashes": skip_log_hashes,
+    }
+    if robots_source is not None:
+        kwargs["robots_source"] = robots_source
+    return kwargs
diff --git a/metrics/services/parser.py b/metrics/services/parser.py
deleted file mode 100644
index 5eb3dbf..0000000
--- a/metrics/services/parser.py
+++ /dev/null
@@ -1,249 +0,0 @@
-import logging
-from datetime import timedelta
-from time import monotonic
-
-from django.conf import settings
-from django.utils import timezone
-
-from scielo_usage_counter import log_handler, url_translator
-
-from log_manager import choices
-from log_manager.models import LogFile
-from log_manager_config.models import CollectionLogDirectory
-from source.models import Source
-from document.models import Document
-from tracker.choices import (
-    LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT,
-    LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE,
-)
-from tracker.models import LogFileDiscardedLine
-
-from metrics.counter import access, documents as index_docs
-from metrics.counter import parser
-
-from .resources import get_log_files_for_collection_date
-from . import daily_payloads
-
-
-def process_daily_metric_job(job, robots_list, mmdb, track_errors=False):
-    log_files = get_log_files_for_collection_date(
-        collection=job.collection,
-        access_date=job.access_date,
-    )
-    if not log_files:
-        raise RuntimeError(f"No log files found for {job.collection.acron3} {job.access_date}.")
-
-    results = {}
-    summary = {
-        "log_files": len(log_files),
-        "input_log_hashes": sorted(log_file.hash for log_file in log_files if log_file.hash),
-        "lines_parsed": 0,
-        "valid_lines": 0,
-        "discarded_lines": 0,
-    }
-
-    LogFile.objects.filter(pk__in=[log_file.pk for log_file in log_files]).update(
-        status=choices.LOG_FILE_STATUS_PARSING,
-        summary={},
-        last_processed_line=0,
-        parse_heartbeat_at=timezone.now(),
-        updated=timezone.now(),
-    )
-    LogFileDiscardedLine.objects.filter(log_file_id__in=[log_file.pk for log_file in log_files]).delete()
-
-    heartbeat_interval_seconds = getattr(settings, "METRICS_PARSE_HEARTBEAT_INTERVAL_SECONDS", 30)
-
-    for log_file in log_files:
-        log_parser, url_translator_manager = setup_parsing_environment(
-            log_file=log_file,
-            robots_list=robots_list,
-            mmdb=mmdb,
-        )
-        line_count = 0
-        valid_count = 0
-        errors = []
-        last_heartbeat_monotonic = monotonic()
-
-        for line in log_parser.parse():
-            line_count += 1
-            if monotonic() - last_heartbeat_monotonic >= heartbeat_interval_seconds:
-                touch_parse_heartbeat(log_file, log_parser.stats.lines_parsed)
-                last_heartbeat_monotonic = monotonic()
-
-            is_valid_line, error_obj = process_line(
-                results=results,
-                line=line,
-                utm=url_translator_manager,
-                log_file=log_file,
-                track_errors=track_errors,
-            )
-            if is_valid_line:
-                valid_count += 1
-            else:
-                summary["discarded_lines"] += 1
-                if error_obj:
-                    errors.append(error_obj)
-
-        if errors:
-            LogFileDiscardedLine.objects.bulk_create(errors)
-
-        summary["lines_parsed"] += line_count
-        summary["valid_lines"] += valid_count
-        log_file.summary = {
-            "parsing_completed": True,
-            "lines_parsed": line_count,
-            "valid_lines": valid_count,
-        }
-        log_file.last_processed_line = log_parser.stats.lines_parsed
-        log_file.parse_heartbeat_at = timezone.now()
-        log_file.save(
-            update_fields=[
-                "summary",
-                "last_processed_line",
-                "parse_heartbeat_at",
-                "updated",
-            ]
-        )
-
-    documents = index_docs.convert_raw_results_to_index_documents(results)
-    storage_path = daily_payloads.build_daily_storage_path(job.collection, job.access_date)
-    payload = {
-        "collection": job.collection.acron3,
-        "access_date": job.access_date.isoformat(),
-        "input_log_hashes": summary["input_log_hashes"],
-        "documents": documents,
-        "summary": summary,
-    }
-    payload_hash = daily_payloads.write_payload(storage_path, payload)
-
-    job.input_log_hashes = summary["input_log_hashes"]
-    job.storage_path = storage_path.as_posix()
-    job.payload_hash = payload_hash
-    job.summary = {
-        **summary,
-        "month_document_count": len(documents.get("month", {})),
-        "year_document_count": len(documents.get("year", {})),
-    }
-    job.save(
-        update_fields=[
-            "input_log_hashes",
-            "storage_path",
-            "payload_hash",
-            "summary",
-            "updated",
-        ]
-    )
-
-    return payload
-
-
-def setup_parsing_environment(log_file, robots_list, mmdb):
-    lp = log_handler.LogParser(mmdb_data=mmdb.data, robots_list=robots_list, output_mode="dict")
-    lp.logfile = log_file.path
-
-    translator_class = None
-    for cld in CollectionLogDirectory.objects.filter(config__collection=log_file.collection):
-        if cld.path in log_file.path:
-            if cld.translator_class:
-                translator_class = parser.translator_class_name_to_obj(cld.translator_class)
-                break
-
-    if not translator_class:
-        raise Exception(f"No URL translator class found for collection {log_file.collection}.")
-
-    utm = url_translator.URLTranslationManager(
-        documents_metadata=Document.metadata(collection=log_file.collection),
-        sources_metadata=Source.metadata(collection=log_file.collection),
-        translator=translator_class,
-    )
-    return lp, utm
-
-
-def process_line(results, line, utm, log_file, track_errors=False):
-    try:
-        translated_url = utm.translate(line.get("url"))
-    except Exception as exc:
-        logging.error("Error translating URL %s: %s", line.get("url"), exc)
-        return False, None
-
-    try:
-        item_access_data = access.extract_item_access_data(log_file.collection.acron3, translated_url)
-    except Exception as exc:
-        logging.error("Error extracting item access data from URL %s: %s", line.get("url"), exc)
-        return False, None
-
-    ignore_utm_validation = not track_errors
-    is_valid, check_result = access.is_valid_item_access_data(
-        item_access_data,
-        utm,
-        ignore_utm_validation,
-    )
-
-    if not is_valid:
-        if track_errors:
-            error_code = check_result.get("code")
-            if error_code in {
-                "invalid_scielo_issn",
-                "invalid_source_id",
-                "invalid_pid_v3",
-                "invalid_pid_v2",
-                "invalid_pid_generic",
-            }:
-                tracker_error_type = (
-                    LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT
-                    if "pid" in error_code
-                    else LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE
-                )
-
-                return False, LogFileDiscardedLine.create(
-                    log_file=log_file,
-                    error_type=tracker_error_type,
-                    message=check_result.get("message"),
-                    data={"line": line, "item_access_data": item_access_data},
-                    save=False,
-                )
-
-        return False, None
-
-    try:
-        access.update_results_with_item_access_data(results, item_access_data, line)
-    except Exception as exc:
-        logging.error("Error updating metrics results for URL %s: %s", line.get("url"), exc)
-        return False, None
-
-    return True, None
-
-
-def touch_parse_heartbeat(log_file, last_processed_line=None):
-    heartbeat_at = timezone.now()
-    update_kwargs = {
-        "parse_heartbeat_at": heartbeat_at,
-        "updated": heartbeat_at,
-    }
-    if last_processed_line is not None:
-        update_kwargs["last_processed_line"] = last_processed_line or 0
-        log_file.last_processed_line = last_processed_line or 0
-    LogFile.objects.filter(pk=log_file.pk).update(**update_kwargs)
-    log_file.parse_heartbeat_at = heartbeat_at
-
-
-def is_stale_parsing_log(log_file, stale_after_minutes=60):
-    if log_file.status != choices.LOG_FILE_STATUS_PARSING:
-        return False
-
-    if not log_file.parse_heartbeat_at:
-        return True
-
-    cutoff = timezone.now() - timedelta(minutes=stale_after_minutes)
-    return log_file.parse_heartbeat_at < cutoff
-
-
-def requeue_stale_parsing_log(log_file):
-    now = timezone.now()
-    LogFile.objects.filter(pk=log_file.pk).update(
-        status=choices.LOG_FILE_STATUS_ERROR,
-        parse_heartbeat_at=None,
-        updated=now,
-    )
-    log_file.status = choices.LOG_FILE_STATUS_ERROR
-    log_file.parse_heartbeat_at = None
diff --git a/metrics/services/parsing/__init__.py b/metrics/services/parsing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/services/parsing/environment.py b/metrics/services/parsing/environment.py
new file mode 100644
index 0000000..dba5567
--- /dev/null
+++ b/metrics/services/parsing/environment.py
@@ -0,0 +1,58 @@
+from scielo_usage_counter.translator.books import URLTranslatorBooksSite
+from scielo_usage_counter.translator.classic import URLTranslatorClassicSite
+from scielo_usage_counter.translator.dataverse import URLTranslatorDataverseSite
+from scielo_usage_counter.translator.opac import URLTranslatorOPACSite
+from scielo_usage_counter.translator.opac_alpha import URLTranslatorOPACAlphaSite
+from scielo_usage_counter.translator.preprints import URLTranslatorPreprintsSite
+
+from document.models import Document
+from log_manager_config.models import CollectionLogDirectory
+from scielo_usage_counter import log_handler, url_translator
+from source.models import Source
+
+
+def setup_parsing_environment(log_file, robots_list, mmdb):
+    log_parser = log_handler.LogParser(
+        mmdb_data=mmdb.data,
+        robots_list=robots_list,
+        output_mode="dict",
+    )
+    log_parser.logfile = log_file.path
+
+    translator_class = _get_log_file_translator_class(log_file)
+    if not translator_class:
+        raise Exception(
+            f"No URL translator class found for collection {log_file.collection}."
+        )
+
+    url_translator_manager = url_translator.URLTranslationManager(
+        documents_metadata=Document.metadata(collection=log_file.collection),
+        sources_metadata=Source.metadata(collection=log_file.collection),
+        translator=translator_class,
+    )
+    return log_parser, url_translator_manager
+
+
+def _get_log_file_translator_class(log_file):
+    for directory in CollectionLogDirectory.objects.filter(
+        config__collection=log_file.collection,
+    ):
+        if directory.path in log_file.path and directory.translator_class:
+            return _get_translator_class(directory.translator_class)
+
+    return None
+
+
+def _get_translator_class(name):
+    if not name or not isinstance(name, str):
+        return None
+
+    translator_classes = {
+        "books": URLTranslatorBooksSite,
+        "classic": URLTranslatorClassicSite,
+        "dataverse": URLTranslatorDataverseSite,
+        "opac": URLTranslatorOPACSite,
+        "opac_alpha": URLTranslatorOPACAlphaSite,
+        "preprints": URLTranslatorPreprintsSite,
+    }
+    return translator_classes.get(name.lower())
diff --git a/metrics/services/parsing/job_payloads.py b/metrics/services/parsing/job_payloads.py
new file mode 100644
index 0000000..fa30b3b
--- /dev/null
+++ b/metrics/services/parsing/job_payloads.py
@@ -0,0 +1,158 @@
+from time import monotonic
+
+from django.conf import settings
+
+from log_manager.models import LogFile
+from metrics.counter.indexing import converter as index_docs
+from metrics.services import daily_payloads
+from metrics.services.parsing.environment import setup_parsing_environment
+from metrics.services.parsing.lines import process_line
+from metrics.services.parsing.log_files import (
+    clear_discarded_lines,
+    mark_log_file_completed,
+    mark_logs_as_parsing,
+    touch_parse_heartbeat,
+)
+from tracker.models import LogFileDiscardedLine
+
+
+def build_daily_metric_job_payload(job, robots_list, mmdb, track_errors=False):
+    input_log_hashes = sorted(job.input_log_hashes or [])
+    log_files = _get_job_log_files(job, input_log_hashes)
+    results = {}
+    summary = _initial_summary(log_files, input_log_hashes)
+
+    mark_logs_as_parsing(log_files)
+    clear_discarded_lines(log_files)
+
+    for log_file in log_files:
+        log_summary = _parse_log_file_into_results(
+            log_file=log_file,
+            results=results,
+            robots_list=robots_list,
+            mmdb=mmdb,
+            track_errors=track_errors,
+        )
+        _merge_log_summary(summary, log_summary)
+
+    documents = index_docs.convert(results)
+    payload = _write_job_payload(job, documents, summary)
+    return payload
+
+
+def _get_job_log_files(job, input_log_hashes):
+    if not input_log_hashes:
+        raise RuntimeError(f"Daily metric job {job.pk} has no input log hashes.")
+
+    log_files = LogFile.for_collection_date_hashes(
+        collection=job.collection,
+        access_date=job.access_date,
+        log_hashes=input_log_hashes,
+    )
+    found_hashes = {log_file.hash for log_file in log_files if log_file.hash}
+    missing_hashes = sorted(set(input_log_hashes) - found_hashes)
+    if missing_hashes:
+        raise RuntimeError(
+            f"Daily metric job {job.pk} is missing log files for "
+            f"{job.collection.acron3} {job.access_date}: "
+            f"{', '.join(missing_hashes)}."
+        )
+    return log_files
+
+
+def _initial_summary(log_files, input_log_hashes):
+    return {
+        "log_files": len(log_files),
+        "input_log_hashes": input_log_hashes,
+        "lines_parsed": 0,
+        "valid_lines": 0,
+        "discarded_lines": 0,
+    }
+
+
+def _parse_log_file_into_results(
+    log_file, results, robots_list, mmdb, track_errors=False
+):
+    log_parser, url_translator_manager = setup_parsing_environment(
+        log_file=log_file,
+        robots_list=robots_list,
+        mmdb=mmdb,
+    )
+    heartbeat_interval_seconds = getattr(
+        settings,
+        "METRICS_PARSE_HEARTBEAT_INTERVAL_SECONDS",
+        30,
+    )
+    summary = {
+        "lines_parsed": 0,
+        "valid_lines": 0,
+        "discarded_lines": 0,
+    }
+    errors = []
+    last_heartbeat_monotonic = monotonic()
+
+    for line in log_parser.parse():
+        summary["lines_parsed"] += 1
+        if monotonic() - last_heartbeat_monotonic >= heartbeat_interval_seconds:
+            touch_parse_heartbeat(log_file, log_parser.stats.lines_parsed)
+            last_heartbeat_monotonic = monotonic()
+
+        is_valid_line, error_obj = process_line(
+            results=results,
+            line=line,
+            utm=url_translator_manager,
+            log_file=log_file,
+            track_errors=track_errors,
+        )
+        if is_valid_line:
+            summary["valid_lines"] += 1
+        else:
+            summary["discarded_lines"] += 1
+            if error_obj:
+                errors.append(error_obj)
+
+    if errors:
+        LogFileDiscardedLine.objects.bulk_create(errors)
+
+    mark_log_file_completed(log_file, log_parser, summary)
+    return summary
+
+
+def _merge_log_summary(summary, log_summary):
+    summary["lines_parsed"] += log_summary["lines_parsed"]
+    summary["valid_lines"] += log_summary["valid_lines"]
+    summary["discarded_lines"] += log_summary["discarded_lines"]
+
+
+def _write_job_payload(job, documents, summary):
+    storage_path = daily_payloads.build_daily_storage_path(
+        job.collection,
+        job.access_date,
+    )
+    payload = {
+        "collection": job.collection.acron3,
+        "access_date": job.access_date.isoformat(),
+        "input_log_hashes": summary["input_log_hashes"],
+        "documents": documents,
+        "summary": summary,
+    }
+    payload_hash = daily_payloads.write_payload(storage_path, payload)
+
+    job.input_log_hashes = summary["input_log_hashes"]
+    job.storage_path = storage_path.as_posix()
+    job.payload_hash = payload_hash
+    job.summary = {
+        **summary,
+        "month_document_count": len(documents.get("month", {})),
+        "year_document_count": len(documents.get("year", {})),
+    }
+    job.save(
+        update_fields=[
+            "input_log_hashes",
+            "storage_path",
+            "payload_hash",
+            "summary",
+            "updated",
+        ]
+    )
+    return payload
diff --git a/metrics/services/parsing/lines.py b/metrics/services/parsing/lines.py
new file mode 100644
index 0000000..f7a7f04
--- /dev/null
+++ b/metrics/services/parsing/lines.py
@@ -0,0 +1,96 @@
+import logging
+
+from metrics.counter.access import accumulation, extraction, validation
+from tracker.choices import (
+    LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT,
+    LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE,
+)
+from tracker.models import LogFileDiscardedLine
+
+TRACKED_VALIDATION_ERROR_CODES = {
+    "invalid_scielo_issn",
+    "invalid_source_id",
+    "invalid_pid_v3",
+    "invalid_pid_v2",
+    "invalid_pid_generic",
+}
+
+
+def process_line(results, line, utm, log_file, track_errors=False):
+    try:
+        translated_url = utm.translate(line.get("url"))
+    except Exception as exc:
+        logging.error("Error translating URL %s: %s", line.get("url"), exc)
+        return False, None
+
+    try:
+        counter_access = extraction.extract(
+            log_file.collection.acron3,
+            translated_url,
+        )
+    except Exception as exc:
+        logging.error(
+            "Error extracting COUNTER access from URL %s: %s", line.get("url"), exc
+        )
+        return False, None
+
+    ignore_utm_validation = not track_errors
+    is_valid, check_result = validation.is_valid(
+        counter_access,
+        utm,
+        ignore_utm_validation,
+    )
+
+    if not is_valid:
+        return _build_discarded_line_error(
+            track_errors=track_errors,
+            check_result=check_result,
+            log_file=log_file,
+            line=line,
+            counter_access=counter_access,
+        )
+
+    try:
+        accumulation.accumulate(
+            results,
+            counter_access,
+            line,
+        )
+    except Exception as exc:
+        logging.error(
+            "Error updating metrics results for URL %s: %s",
+            line.get("url"),
+            exc,
+        )
+        return False, None
+
+    return True, None
+
+
+def _build_discarded_line_error(
+    track_errors,
+    check_result,
+    log_file,
+    line,
+    counter_access,
+):
+    if not track_errors:
+        return False, None
+
+    error_code = check_result.get("code")
+    if error_code not in TRACKED_VALIDATION_ERROR_CODES:
+        return False, None
+
+    tracker_error_type = (
+        LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT
+        if "pid" in error_code
+        else LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE
+    )
+
+    return False, LogFileDiscardedLine.create(
+        log_file=log_file,
+        error_type=tracker_error_type,
+        message=check_result.get("message"),
+        data={"line": line, "item_access_data": counter_access},
+        save=False,
+    )
diff --git a/metrics/services/parsing/log_files.py b/metrics/services/parsing/log_files.py
new file mode 100644
index 0000000..5bfbfa9
--- /dev/null
+++ b/metrics/services/parsing/log_files.py
@@ -0,0 +1,78 @@
+from datetime import timedelta
+
+from django.utils import timezone
+
+from log_manager import choices
+from log_manager.models import LogFile
+from tracker.models import LogFileDiscardedLine
+
+
+def mark_logs_as_parsing(log_files):
+    now = timezone.now()
+    LogFile.objects.filter(pk__in=[log_file.pk for log_file in log_files]).update(
+        status=choices.LOG_FILE_STATUS_PARSING,
+        summary={},
+        last_processed_line=0,
+        parse_heartbeat_at=now,
+        updated=now,
+    )
+
+
+def clear_discarded_lines(log_files):
+    LogFileDiscardedLine.objects.filter(
+        log_file_id__in=[log_file.pk for log_file in log_files]
+    ).delete()
+
+
+def mark_log_file_completed(log_file, log_parser, summary):
+    log_file.summary = {
+        "parsing_completed": True,
+        "lines_parsed": summary["lines_parsed"],
+        "valid_lines": summary["valid_lines"],
+    }
+    log_file.last_processed_line = log_parser.stats.lines_parsed
+    log_file.parse_heartbeat_at = timezone.now()
+    log_file.save(
+        update_fields=[
+            "summary",
+            "last_processed_line",
+            "parse_heartbeat_at",
+            "updated",
+        ]
+    )
+
+
+def touch_parse_heartbeat(log_file, last_processed_line=None):
+    heartbeat_at = timezone.now()
+    update_kwargs = {
+        "parse_heartbeat_at": heartbeat_at,
+        "updated": heartbeat_at,
+    }
+    if last_processed_line is not None:
+        update_kwargs["last_processed_line"] = last_processed_line or 0
+        log_file.last_processed_line = last_processed_line or 0
+
+    LogFile.objects.filter(pk=log_file.pk).update(**update_kwargs)
+    log_file.parse_heartbeat_at = heartbeat_at
+
+
+def is_stale_parsing_log(log_file, stale_after_minutes=60):
+    if log_file.status != choices.LOG_FILE_STATUS_PARSING:
+        return False
+
+    if not log_file.parse_heartbeat_at:
+        return True
+
+    cutoff = timezone.now() - timedelta(minutes=stale_after_minutes)
+    return log_file.parse_heartbeat_at < cutoff
+
+
+def requeue_stale_parsing_log(log_file):
+    now = timezone.now()
+    LogFile.objects.filter(pk=log_file.pk).update(
+        status=choices.LOG_FILE_STATUS_ERROR,
+        parse_heartbeat_at=None,
+        updated=now,
+    )
+    log_file.status = choices.LOG_FILE_STATUS_ERROR
+    log_file.parse_heartbeat_at = None
diff --git a/metrics/services/resources.py b/metrics/services/resources.py
index dc31400..9ac4b99 100644
--- a/metrics/services/resources.py
+++ b/metrics/services/resources.py
@@ -1,16 +1,7 @@
 import logging
 
-from django.conf import settings
-
-from log_manager.models import LogFile
 from resources.models import MMDB, RobotUserAgent
 
-from metrics import opensearch
-
-
-def extract_celery_queue_name(collection_acronym):
-    return f"parse_{settings.COLLECTION_ACRON3_SIZE_MAP.get(collection_acronym, 'small')}"
-
 
 def fetch_required_resources(robot_source=None):
     robots_list = RobotUserAgent.get_patterns(source=robot_source)
@@ -28,27 +19,3 @@ def fetch_required_resources(robot_source=None):
         return None, None
 
     return robots_list, mmdb
-
-
-def build_search_client():
-    return opensearch.OpenSearchUsageClient(
-        settings.OPENSEARCH_URL,
-        settings.OPENSEARCH_BASIC_AUTH,
-        settings.OPENSEARCH_API_KEY,
-        settings.OPENSEARCH_VERIFY_CERTS,
-    )
-
-
-def get_log_files_for_collection_date(collection, access_date, status_filters=None):
-    queryset = (
-        LogFile.objects.filter(
-            collection=collection,
-            date=access_date,
-        )
-        .select_related("collection")
-        .order_by("path", "hash")
-    )
-    if status_filters:
-        queryset = queryset.filter(status__in=status_filters)
-
-    return list(queryset)
diff --git a/metrics/services/resume.py b/metrics/services/resume.py
new file mode 100644
index 0000000..48253a4
--- /dev/null
+++ b/metrics/services/resume.py
@@ -0,0 +1,258 @@
+import logging
+
+from django.utils import timezone
+
+from config.collections import get_collection_parse_queue
+from core.utils.date_utils import get_date_obj, get_date_range_str
+from log_manager import choices
+from log_manager.models import LogFile
+from metrics.models import DailyMetricJob
+from metrics.services.jobs import (
+    create_or_update_daily_metric_job,
+    release_stale_daily_metric_jobs,
+)
+from metrics.services.parsing.log_files import (
+    is_stale_parsing_log,
+    requeue_stale_parsing_log,
+)
+
+
+def resume_daily_metric_jobs(
+    daily_metric_export_task,
+    collections=None,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    stale_after_minutes=60,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    robots_source=None,
+):
+    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
+    from_date_obj = get_date_obj(from_date)
+    until_date_obj = get_date_obj(until_date)
+
+    released_stale_jobs = release_stale_daily_metric_jobs(
+        collections=collections,
+        from_date=from_date_obj,
+        until_date=until_date_obj,
+        stale_after_minutes=stale_after_minutes,
+    )
+    resumed_jobs = _enqueue_resumable_daily_metric_jobs(
+        daily_metric_export_task=daily_metric_export_task,
+        collections=collections,
+        from_date_obj=from_date_obj,
+        until_date_obj=until_date_obj,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        robots_source=robots_source,
+    )
+
+    logging.info(
+        "Resumed daily metric jobs for %s day(s); released %s stale job(s) at %s.",
+        resumed_jobs,
+        released_stale_jobs,
+        timezone.now(),
+    )
+    return {
+        "resumed_logs": resumed_jobs,
+        "resumed_jobs": resumed_jobs,
+        "released_stale_batches": released_stale_jobs,
+        "released_stale_jobs": released_stale_jobs,
+    }
+
+
+def resume_stale_parsing_logs(
+    log_parsing_task,
+    collections=None,
+    batch_size=5000,
+    track_errors=False,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    stale_after_minutes=60,
+    max_log_files=None,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    robots_source=None,
+):
+    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
+    from_date_obj = get_date_obj(from_date)
+    until_date_obj = get_date_obj(until_date)
+
+    resumed_logs = _requeue_matching_stale_logs(
+        collections=collections,
+        from_date_obj=from_date_obj,
+        until_date_obj=until_date_obj,
+        stale_after_minutes=stale_after_minutes,
+        max_log_files=max_log_files,
+    )
+    _enqueue_log_parsing_retry(
+        log_parsing_task=log_parsing_task,
+        collections=collections,
+        batch_size=batch_size,
+        track_errors=track_errors,
+        from_date=from_date,
+        until_date=until_date,
+        max_log_files=max_log_files,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        robots_source=robots_source,
+    )
+    return {
+        "stale_logs_marked_for_retry": resumed_logs,
+        "parse_logs_enqueued": True,
+    }
+
+
+def _enqueue_resumable_daily_metric_jobs(
+    daily_metric_export_task,
+    collections,
+    from_date_obj,
+    until_date_obj,
+    queue_name,
+    user_id,
+    username,
+    robots_source,
+):
+    resumed_jobs = 0
+    for job in _get_resumable_daily_metric_jobs(
+        collections, from_date_obj, until_date_obj
+    ):
+        job = _refresh_job_inputs_from_retryable_logs(job)
+        if job is None or job.status == DailyMetricJob.STATUS_EXPORTED:
+            continue
+
+        daily_metric_export_task.apply_async(
+            args=(job.pk, False, user_id, username, robots_source),
+            queue=queue_name or get_collection_parse_queue(job.collection.acron3),
+        )
+        resumed_jobs += 1
+    return resumed_jobs
+
+
+def _get_resumable_daily_metric_jobs(collections, from_date_obj, until_date_obj):
+    queryset = (
+        DailyMetricJob.objects.filter(
+            status__in=[DailyMetricJob.STATUS_PENDING, DailyMetricJob.STATUS_ERROR],
+            access_date__gte=from_date_obj,
+            access_date__lte=until_date_obj,
+        )
+        .select_related("collection")
+        .order_by("access_date", "collection__acron3")
+    )
+    if collections:
+        queryset = queryset.filter(collection__acron3__in=collections)
+    return queryset
+
+
+def _refresh_job_inputs_from_retryable_logs(job):
+    log_files = LogFile.for_collection_date(
+        collection=job.collection,
+        access_date=job.access_date,
+        status_filters=[
+            choices.LOG_FILE_STATUS_QUEUED,
+            choices.LOG_FILE_STATUS_ERROR,
+        ],
+    )
+    if log_files:
+        return create_or_update_daily_metric_job(
+            collection=job.collection,
+            access_date=job.access_date,
+            log_files=log_files,
+        )
+
+    if job.storage_path and job.payload_hash:
+        return job
+
+    logging.warning(
+        "Skipping daily metric job %s: no queued/error logs or stored payload.",
+        job.pk,
+    )
+    return None
+
+
+def _requeue_matching_stale_logs(
+    collections,
+    from_date_obj,
+    until_date_obj,
+    stale_after_minutes,
+    max_log_files,
+):
+    resumed_logs = 0
+    for log_file in _get_parsing_logs(collections):
+        probably_date = _extract_date_from_validation_dict(log_file.validation)
+        if not _is_log_date_inside_range(probably_date, from_date_obj, until_date_obj):
+            continue
+        if not is_stale_parsing_log(log_file, stale_after_minutes=stale_after_minutes):
+            continue
+
+        requeue_stale_parsing_log(log_file)
+        resumed_logs += 1
+        if max_log_files and resumed_logs >= max_log_files:
+            break
+    return resumed_logs
+
+
+def _get_parsing_logs(collections):
+    queryset = (
+        LogFile.objects.filter(status=choices.LOG_FILE_STATUS_PARSING)
+        .select_related("collection")
+        .order_by("validation__probably_date", "path", "hash")
+    )
+    if collections:
+        queryset = queryset.filter(collection__acron3__in=collections)
+    return queryset
+
+
+def _is_log_date_inside_range(probably_date, from_date_obj, until_date_obj):
+    return probably_date and from_date_obj <= probably_date <= until_date_obj
+
+
+def _enqueue_log_parsing_retry(
+    log_parsing_task,
+    collections,
+    batch_size,
+    track_errors,
+    from_date,
+    until_date,
+    max_log_files,
+    queue_name,
+    user_id,
+    username,
+    robots_source,
+):
+    apply_kwargs = {
+        "kwargs": {
+            "collections": collections,
+            "include_logs_with_error": True,
+            "batch_size": batch_size,
+            "max_log_files": max_log_files,
+            "auto_reexecute": False,
+            "replace": False,
+            "track_errors": track_errors,
+            "from_date": from_date,
+            "until_date": until_date,
+            "days_to_go_back": None,
+            "queue_name": queue_name,
+            "user_id": user_id,
+            "username": username,
+            "robots_source": robots_source,
+        }
+    }
+    if queue_name:
+        apply_kwargs["queue"] = queue_name
+    log_parsing_task.apply_async(**apply_kwargs)
+
+
+def _extract_date_from_validation_dict(validation):
+    try:
+        date_str = validation.get("probably_date")
+        return get_date_obj(date_str, "%Y-%m-%d")
+    except Exception as e:
+        logging.error(f"Failed to extract date from validation: {e}")
+        return None
diff --git a/metrics/tasks/__init__.py b/metrics/tasks/__init__.py
index f0c2d6a..e69de29 100644
--- a/metrics/tasks/__init__.py
+++ b/metrics/tasks/__init__.py
@@ -1,19 +0,0 @@
-from .parse import (
-    task_parse_logs,
-    task_wait_parse_logs_wave,
-)
-from .process import (
-    task_process_daily_metric_job,
-)
-from .resume import (
-    task_resume_log_exports,
-    task_resume_stale_parsing_logs,
-)
-from .index import (
-    task_create_index,
-    task_delete_index,
-    task_delete_documents_by_key,
-)
-from .cleanup import (
-    task_cleanup_daily_payloads,
-)
diff --git a/metrics/tasks/daily_metric_exports.py b/metrics/tasks/daily_metric_exports.py
new file mode 100644
index 0000000..5bd8a2f
--- /dev/null
+++ b/metrics/tasks/daily_metric_exports.py
@@ -0,0 +1,22 @@
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from core.utils.request_utils import _get_user
+from metrics.services.daily_metric_exports import build_and_export_daily_metric_job
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Process Daily Job"), timelimit=-1)
+def task_build_and_export_daily_metric_job(
+    self,
+    job_id,
+    track_errors=False,
+    user_id=None,
+    username=None,
+    robots_source=None,
+):
+    _get_user(self.request, username=username, user_id=user_id)
+    return build_and_export_daily_metric_job(
+        job_id=job_id,
+        track_errors=track_errors,
+        robots_source=robots_source,
+    )
diff --git a/metrics/tasks/index.py b/metrics/tasks/index.py
index 2635377..eea151f 100644
--- a/metrics/tasks/index.py
+++ b/metrics/tasks/index.py
@@ -4,14 +4,13 @@
 
 from config import celery_app
 from core.utils.request_utils import _get_user
-
-from metrics.services.resources import build_search_client
+from metrics.opensearch.client import OpenSearchUsageClient
 
 
 @celery_app.task(bind=True, name=_("[Metrics] Create Index"), timelimit=-1)
 def task_create_index(self, index_name, mappings=None, user_id=None, username=None):
     _get_user(self.request, username=username, user_id=user_id)
-    search_client = build_search_client()
+    search_client = OpenSearchUsageClient()
 
     try:
         if search_client.client.indices.exists(index=index_name):
@@ -24,26 +23,10 @@ def task_create_index(self, index_name, mappings=None, user_id=None, username=No
         logging.error("Failed to create index %s: %s", index_name, exc)
 
 
-@celery_app.task(bind=True, name=_("[Metrics] Delete Index"), timelimit=-1)
-def task_delete_index(self, index_name, user_id=None, username=None):
-    _get_user(self.request, username=username, user_id=user_id)
-    search_client = build_search_client()
-
-    try:
-        if not search_client.client.indices.exists(index=index_name):
-            logging.info("Index %s does not exist.", index_name)
-            return
-
-        search_client.delete_index(index_name=index_name)
-        logging.info("Index %s deleted successfully.", index_name)
-    except Exception as exc:
-        logging.error("Failed to delete index %s: %s", index_name, exc)
-
-
 @celery_app.task(bind=True, name=_("[Metrics] Delete Documents by Key"), timelimit=-1)
 def task_delete_documents_by_key(self, index_name, data, user_id=None, username=None):
     _get_user(self.request, username=username, user_id=user_id)
-    search_client = build_search_client()
+    search_client = OpenSearchUsageClient()
 
     try:
         search_client.delete_documents_by_key(index_name=index_name, data=data)
diff --git a/metrics/tasks/log_parsing.py b/metrics/tasks/log_parsing.py
new file mode 100644
index 0000000..f9c45cb
--- /dev/null
+++ b/metrics/tasks/log_parsing.py
@@ -0,0 +1,99 @@
+from django.utils.translation import gettext as _
+
+from config import celery_app
+from metrics.services import log_parsing_jobs
+from metrics.tasks.daily_metric_exports import task_build_and_export_daily_metric_job
+
+
+@celery_app.task(
+    bind=True, name=_("[Log Pipeline] 3. Parse Logs (Manual)"), timelimit=-1
+)
+def task_enqueue_log_parsing_jobs(
+    self,
+    collections=None,
+    include_logs_with_error=True,
+    batch_size=5000,
+    max_log_files=None,
+    auto_reexecute=False,
+    replace=False,
+    track_errors=False,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    skip_log_hashes=None,
+    robots_source=None,
+):
+    if replace:
+        raise ValueError(
+            "replace=True is not supported. Recompute requires deleting/recreating "
+            "the affected day or period first."
+        )
+
+    return log_parsing_jobs.enqueue_log_parsing_jobs(
+        daily_metric_export_task=task_build_and_export_daily_metric_job,
+        wait_log_parsing_wave_task=task_wait_log_parsing_wave,
+        collections=collections,
+        include_logs_with_error=include_logs_with_error,
+        batch_size=batch_size,
+        max_log_files=max_log_files,
+        auto_reexecute=auto_reexecute,
+        replace=replace,
+        track_errors=track_errors,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        skip_log_hashes=skip_log_hashes,
+        robots_source=robots_source,
+    )
+
+
+@celery_app.task(bind=True, name=_("[Metrics] Wait Parse Logs Wave"), timelimit=-1)
+def task_wait_log_parsing_wave(
+    self,
+    wave_job_ids=None,
+    collections=None,
+    include_logs_with_error=True,
+    batch_size=5000,
+    max_log_files=None,
+    auto_reexecute=False,
+    replace=False,
+    track_errors=False,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+    queue_name=None,
+    user_id=None,
+    username=None,
+    skip_log_hashes=None,
+    poll_interval_seconds=log_parsing_jobs.AUTO_REEXECUTE_POLL_INTERVAL_SECONDS,
+    robots_source=None,
+    wave_log_hashes=None,
+):
+    return log_parsing_jobs.wait_log_parsing_wave(
+        log_parsing_task=task_enqueue_log_parsing_jobs,
+        wait_log_parsing_wave_task=task_wait_log_parsing_wave,
+        wave_job_ids=wave_job_ids,
+        collections=collections,
+        include_logs_with_error=include_logs_with_error,
+        batch_size=batch_size,
+        max_log_files=max_log_files,
+        auto_reexecute=auto_reexecute,
+        replace=replace,
+        track_errors=track_errors,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        skip_log_hashes=skip_log_hashes,
+        poll_interval_seconds=poll_interval_seconds,
+        robots_source=robots_source,
+        wave_log_hashes=wave_log_hashes,
+    )
diff --git a/metrics/tasks/parse.py b/metrics/tasks/parse.py
deleted file mode 100644
index ad3398c..0000000
--- a/metrics/tasks/parse.py
+++ /dev/null
@@ -1,295 +0,0 @@
-import logging
-
-from django.utils.translation import gettext as _
-
-from config import celery_app
-from core.utils.date_utils import get_date_obj, get_date_range_str
-from core.utils.request_utils import _get_user
-from collection.models import Collection
-from log_manager import choices
-from log_manager.models import LogFile
-from metrics.models import DailyMetricJob
-
-from metrics.services.resources import extract_celery_queue_name, get_log_files_for_collection_date
-from metrics.services.jobs import create_or_update_daily_metric_job
-from metrics.tasks.process import task_process_daily_metric_job
-
-AUTO_REEXECUTE_POLL_INTERVAL_SECONDS = 30
-
-
-@celery_app.task(bind=True, name=_("[Log Pipeline] 3. Parse Logs (Manual)"), timelimit=-1)
-def task_parse_logs(
-    self,
-    collections=None,
-    include_logs_with_error=True,
-    batch_size=5000,
-    max_log_files=None,
-    auto_reexecute=False,
-    replace=False,
-    track_errors=False,
-    from_date=None,
-    until_date=None,
-    days_to_go_back=None,
-    queue_name=None,
-    user_id=None,
-    username=None,
-    skip_log_hashes=None,
-    robots_source=None,
-):
-    if replace:
-        raise ValueError(
-            "replace=True is not supported. Recompute requires deleting/recreating "
-            "the affected day or period first."
-        )
-
-    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
-    from_date_obj = get_date_obj(from_date)
-    until_date_obj = get_date_obj(until_date)
-    enqueued_jobs = 0
-    reached_max_log_files = False
-    enqueued_wave_job_ids = []
-    claimed_status_filters = list(_build_parse_status_filters(include_logs_with_error))
-    skip_log_hashes = set(skip_log_hashes or [])
-
-    for collection in collections or Collection.acron3_list():
-        collection_obj = Collection.objects.filter(acron3=collection).first()
-        if not collection_obj:
-            continue
-
-        access_dates = _find_access_dates(
-            collection=collection_obj,
-            from_date=from_date,
-            until_date=until_date,
-            from_date_obj=from_date_obj,
-            until_date_obj=until_date_obj,
-            status_filters=claimed_status_filters,
-            skip_log_hashes=skip_log_hashes,
-        )
-
-        for access_date in access_dates:
-            log_files = get_log_files_for_collection_date(
-                collection=collection_obj,
-                access_date=access_date,
-                status_filters=claimed_status_filters,
-            )
-            log_files = [log_file for log_file in log_files if log_file.hash not in skip_log_hashes]
-            if not log_files:
-                continue
-
-            job = create_or_update_daily_metric_job(
-                collection=collection_obj,
-                access_date=access_date,
-                log_files=log_files,
-            )
-            if job.status == DailyMetricJob.STATUS_EXPORTED:
-                continue
-
-            task_process_daily_metric_job.apply_async(
-                args=(job.pk, track_errors, user_id, username, robots_source),
-                queue=queue_name or extract_celery_queue_name(collection),
-            )
-            enqueued_wave_job_ids.append(job.pk)
-            enqueued_jobs += 1
-            if max_log_files and enqueued_jobs >= max_log_files:
-                reached_max_log_files = True
-                break
-
-        if reached_max_log_files:
-            break
-
-    auto_reexecution_enqueued = _schedule_parse_logs_reexecution(
-        should_reexecute=auto_reexecute and reached_max_log_files and bool(enqueued_wave_job_ids),
-        wave_job_ids=enqueued_wave_job_ids,
-        collections=collections,
-        include_logs_with_error=include_logs_with_error,
-        batch_size=batch_size,
-        max_log_files=max_log_files,
-        auto_reexecute=auto_reexecute,
-        replace=replace,
-        track_errors=track_errors,
-        from_date=from_date,
-        until_date=until_date,
-        days_to_go_back=days_to_go_back,
-        queue_name=queue_name,
-        user_id=user_id,
-        username=username,
-        skip_log_hashes=sorted(skip_log_hashes),
-        robots_source=robots_source,
-    )
-
-    return {
-        "enqueued_logs": enqueued_jobs,
-        "enqueued_jobs": enqueued_jobs,
-        "reached_max_log_files": reached_max_log_files,
-        "auto_reexecution_enqueued": auto_reexecution_enqueued,
-    }
-
-
-def _build_parse_status_filters(include_logs_with_error):
-    status_filters = [choices.LOG_FILE_STATUS_QUEUED]
-    if include_logs_with_error:
-        status_filters.append(choices.LOG_FILE_STATUS_ERROR)
-    return tuple(status_filters)
-
-
-def _find_access_dates(
-    collection,
-    from_date,
-    until_date,
-    from_date_obj,
-    until_date_obj,
-    status_filters,
-    skip_log_hashes,
-):
-    date_queryset = (
-        LogFile.objects.filter(
-            status__in=status_filters,
-            collection=collection,
-            date__gte=from_date_obj,
-            date__lte=until_date_obj,
-        )
-        .exclude(hash__in=skip_log_hashes)
-        .values_list("date", flat=True)
-        .distinct()
-        .order_by("date")
-    )
-
-    access_dates = set()
-    for value in list(date_queryset):
-        access_date = value if hasattr(value, "isoformat") else get_date_obj(value)
-        if access_date and from_date_obj <= access_date <= until_date_obj:
-            access_dates.add(access_date)
-    return sorted(access_dates)
-
-
-def _schedule_parse_logs_reexecution(
-    should_reexecute,
-    wave_job_ids,
-    collections,
-    include_logs_with_error,
-    batch_size,
-    max_log_files,
-    auto_reexecute,
-    replace,
-    track_errors,
-    from_date,
-    until_date,
-    days_to_go_back,
-    queue_name,
-    user_id,
-    username,
-    skip_log_hashes,
-    robots_source=None,
-):
-    if not should_reexecute:
-        return False
-
-    kwargs = {
-        "wave_job_ids": wave_job_ids,
-        "collections": collections,
-        "include_logs_with_error": include_logs_with_error,
-        "batch_size": batch_size,
-        "max_log_files": max_log_files,
-        "auto_reexecute": auto_reexecute,
-        "replace": replace,
-        "track_errors": track_errors,
-        "from_date": from_date,
-        "until_date": until_date,
-        "days_to_go_back": days_to_go_back,
-        "queue_name": queue_name,
-        "user_id": user_id,
-        "username": username,
-        "skip_log_hashes": skip_log_hashes,
-        "poll_interval_seconds": AUTO_REEXECUTE_POLL_INTERVAL_SECONDS,
-    }
-    if robots_source is not None:
-        kwargs["robots_source"] = robots_source
-
-    apply_kwargs = {"kwargs": kwargs}
-    if queue_name:
-        apply_kwargs["queue"] = queue_name
-    task_wait_parse_logs_wave.apply_async(**apply_kwargs)
-    return True
-
-
-@celery_app.task(bind=True, name=_("[Metrics] Wait Parse Logs Wave"), timelimit=-1)
-def task_wait_parse_logs_wave(
-    self,
-    wave_job_ids=None,
-    collections=None,
-    include_logs_with_error=True,
-    batch_size=5000,
-    max_log_files=None,
-    auto_reexecute=False,
-    replace=False,
-    track_errors=False,
-    from_date=None,
-    until_date=None,
-    days_to_go_back=None,
-    queue_name=None,
-    user_id=None,
-    username=None,
-    skip_log_hashes=None,
-    poll_interval_seconds=AUTO_REEXECUTE_POLL_INTERVAL_SECONDS,
-    robots_source=None,
-    wave_log_hashes=None,
-):
-    wave_job_ids = wave_job_ids or wave_log_hashes or []
-    if DailyMetricJob.objects.filter(
-        pk__in=wave_job_ids,
-        status__in=[DailyMetricJob.STATUS_PENDING, DailyMetricJob.STATUS_EXPORTING],
-    ).exists():
-        kwargs = {
-            "wave_job_ids": wave_job_ids,
-            "collections": collections,
-            "include_logs_with_error": include_logs_with_error,
-            "batch_size": batch_size,
-            "max_log_files": max_log_files,
-            "auto_reexecute": auto_reexecute,
-            "replace": replace,
-            "track_errors": track_errors,
-            "from_date": from_date,
-            "until_date": until_date,
-            "days_to_go_back": days_to_go_back,
-            "queue_name": queue_name,
-            "user_id": user_id,
-            "username": username,
-            "skip_log_hashes": skip_log_hashes,
-            "poll_interval_seconds": poll_interval_seconds,
-        }
-        if robots_source is not None:
-            kwargs["robots_source"] = robots_source
-
-        apply_kwargs = {
-            "kwargs": kwargs,
-            "countdown": poll_interval_seconds,
-        }
-        if queue_name:
-            apply_kwargs["queue"] = queue_name
-        task_wait_parse_logs_wave.apply_async(**apply_kwargs)
-        return {"wave_completed": False, "reexecution_enqueued": False}
-
-    kwargs = {
-        "collections": collections,
-        "include_logs_with_error": include_logs_with_error,
-        "batch_size": batch_size,
-        "max_log_files": max_log_files,
-        "auto_reexecute": auto_reexecute,
-        "replace": replace,
-        "track_errors": track_errors,
-        "from_date": from_date,
-        "until_date": until_date,
-        "days_to_go_back": days_to_go_back,
-        "queue_name": queue_name,
-        "user_id": user_id,
-        "username": username,
-        "skip_log_hashes": skip_log_hashes,
-    }
-    if robots_source is not None:
-        kwargs["robots_source"] = robots_source
-
-    apply_kwargs = {"kwargs": kwargs}
-    if queue_name:
-        apply_kwargs["queue"] = queue_name
-    task_parse_logs.apply_async(**apply_kwargs)
-    return {"wave_completed": True, "reexecution_enqueued": True}
diff --git a/metrics/tasks/process.py b/metrics/tasks/process.py
deleted file mode 100644
index ecdc7a5..0000000
--- a/metrics/tasks/process.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import logging
-
-from django.utils.translation import gettext as _
-
-from config import celery_app
-from core.utils.request_utils import _get_user
-from metrics.models import DailyMetricJob
-
-from metrics.services.jobs import acquire_daily_metric_job, mark_daily_metric_job_exported, mark_daily_metric_job_failed
-from metrics.services.export import export_daily_metric_payload, load_daily_metric_payload
-from metrics.services.resources import build_search_client, fetch_required_resources
-from metrics.services.parser import process_daily_metric_job
-
-
-@celery_app.task(bind=True, name=_("[Metrics] Process Daily Job"), timelimit=-1)
-def task_process_daily_metric_job(
-    self,
-    job_id,
-    track_errors=False,
-    user_id=None,
-    username=None,
-    robots_source=None,
-):
-    user = _get_user(self.request, username=username, user_id=user_id)
-
-    try:
-        job = acquire_daily_metric_job(job_id)
-    except DailyMetricJob.DoesNotExist:
-        logging.error("Daily metric job %s does not exist.", job_id)
-        return
-
-    if not job:
-        return
-
-    try:
-        payload = load_daily_metric_payload(job)
-        if payload is None or not job.payload_hash:
-            robots_list, mmdb = fetch_required_resources(robot_source=robots_source)
-            if not robots_list or not mmdb:
-                raise RuntimeError("Required parsing resources are not available.")
-            payload = process_daily_metric_job(
-                job=job,
-                robots_list=robots_list,
-                mmdb=mmdb,
-                track_errors=track_errors,
-            )
-            job.refresh_from_db()
-
-        search_client = build_search_client()
-        if not search_client.ping():
-            raise RuntimeError("OpenSearch client is not available.")
-
-        export_daily_metric_payload(
-            search_client=search_client,
-            job=job,
-            payload=payload,
-        )
-    except Exception as exc:
-        logging.error("Failed to process daily metric job %s: %s", job_id, exc)
-        mark_daily_metric_job_failed(job, exc)
-        return
-
-    mark_daily_metric_job_exported(job, user=user)
diff --git a/metrics/tasks/resume.py b/metrics/tasks/resume.py
index c0fe705..cd1c76a 100644
--- a/metrics/tasks/resume.py
+++ b/metrics/tasks/resume.py
@@ -1,22 +1,10 @@
-import logging
-
-from django.utils import timezone
 from django.utils.translation import gettext as _
 
 from config import celery_app
-from core.utils.date_utils import get_date_obj, get_date_range_str
 from core.utils.request_utils import _get_user
-from log_manager import choices
-from log_manager.models import LogFile
-from metrics.models import DailyMetricJob
-
-from metrics.services.jobs import create_or_update_daily_metric_job, release_stale_daily_metric_jobs
-from metrics.services.resources import extract_celery_queue_name, get_log_files_for_collection_date
-from metrics.services.parser import is_stale_parsing_log, requeue_stale_parsing_log
-from metrics.counter import parser
-
-from .parse import task_parse_logs
-from .process import task_process_daily_metric_job
+from metrics.services import resume
+from metrics.tasks.daily_metric_exports import task_build_and_export_daily_metric_job
+from metrics.tasks.log_parsing import task_enqueue_log_parsing_jobs
 
 
 @celery_app.task(bind=True, name=_("[Metrics] Resume Log Exports"), timelimit=-1)
@@ -33,69 +21,18 @@ def task_resume_log_exports(
     robots_source=None,
 ):
     _get_user(self.request, username=username, user_id=user_id)
-
-    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
-    from_date_obj = get_date_obj(from_date)
-    until_date_obj = get_date_obj(until_date)
-
-    released_stale_jobs = release_stale_daily_metric_jobs(
+    return resume.resume_daily_metric_jobs(
+        daily_metric_export_task=task_build_and_export_daily_metric_job,
         collections=collections,
-        from_date=from_date_obj,
-        until_date=until_date_obj,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
         stale_after_minutes=stale_after_minutes,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        robots_source=robots_source,
     )
-    queryset = DailyMetricJob.objects.filter(
-        status__in=[DailyMetricJob.STATUS_PENDING, DailyMetricJob.STATUS_ERROR],
-        access_date__gte=from_date_obj,
-        access_date__lte=until_date_obj,
-    ).select_related("collection").order_by("access_date", "collection__acron3")
-    if collections:
-        queryset = queryset.filter(collection__acron3__in=collections)
-
-    resumed_jobs = 0
-    for job in queryset:
-        log_files = get_log_files_for_collection_date(
-            collection=job.collection,
-            access_date=job.access_date,
-            status_filters=[
-                choices.LOG_FILE_STATUS_QUEUED,
-                choices.LOG_FILE_STATUS_ERROR,
-            ],
-        )
-        if log_files:
-            job = create_or_update_daily_metric_job(
-                collection=job.collection,
-                access_date=job.access_date,
-                log_files=log_files,
-            )
-        elif not (job.storage_path and job.payload_hash):
-            logging.warning(
-                "Skipping daily metric job %s: no queued/error logs or stored payload.",
-                job.pk,
-            )
-            continue
-
-        if job.status == DailyMetricJob.STATUS_EXPORTED:
-            continue
-
-        task_process_daily_metric_job.apply_async(
-            args=(job.pk, False, user_id, username, robots_source),
-            queue=queue_name or extract_celery_queue_name(job.collection.acron3),
-        )
-        resumed_jobs += 1
-
-    logging.info(
-        "Resumed daily metric jobs for %s day(s); released %s stale job(s) at %s.",
-        resumed_jobs,
-        released_stale_jobs,
-        timezone.now(),
-    )
-    return {
-        "resumed_logs": resumed_jobs,
-        "resumed_jobs": resumed_jobs,
-        "released_stale_batches": released_stale_jobs,
-        "released_stale_jobs": released_stale_jobs,
-    }
 
 
 @celery_app.task(bind=True, name=_("[Metrics] Resume Stale Parsing Logs"), timelimit=-1)
@@ -114,53 +51,18 @@ def task_resume_stale_parsing_logs(
     username=None,
     robots_source=None,
 ):
-    from_date, until_date = get_date_range_str(from_date, until_date, days_to_go_back)
-    from_date_obj = get_date_obj(from_date)
-    until_date_obj = get_date_obj(until_date)
-
-    queryset = (
-        LogFile.objects.filter(status=choices.LOG_FILE_STATUS_PARSING)
-        .select_related("collection")
-        .order_by("validation__probably_date", "path", "hash")
+    return resume.resume_stale_parsing_logs(
+        log_parsing_task=task_enqueue_log_parsing_jobs,
+        collections=collections,
+        batch_size=batch_size,
+        track_errors=track_errors,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
+        stale_after_minutes=stale_after_minutes,
+        max_log_files=max_log_files,
+        queue_name=queue_name,
+        user_id=user_id,
+        username=username,
+        robots_source=robots_source,
     )
-    if collections:
-        queryset = queryset.filter(collection__acron3__in=collections)
-
-    resumed_logs = 0
-    for log_file in queryset:
-        probably_date = parser.extract_date_from_validation_dict(log_file.validation)
-        if not probably_date or probably_date < from_date_obj or probably_date > until_date_obj:
-            continue
-        if not is_stale_parsing_log(log_file, stale_after_minutes=stale_after_minutes):
-            continue
-
-        requeue_stale_parsing_log(log_file)
-        resumed_logs += 1
-        if max_log_files and resumed_logs >= max_log_files:
-            break
-
-    apply_kwargs = {
-        "kwargs": {
-            "collections": collections,
-            "include_logs_with_error": True,
-            "batch_size": batch_size,
-            "max_log_files": max_log_files,
-            "auto_reexecute": False,
-            "replace": False,
-            "track_errors": track_errors,
-            "from_date": from_date,
-            "until_date": until_date,
-            "days_to_go_back": None,
-            "queue_name": queue_name,
-            "user_id": user_id,
-            "username": username,
-            "robots_source": robots_source,
-        }
-    }
-    if queue_name:
-        apply_kwargs["queue"] = queue_name
-    task_parse_logs.apply_async(**apply_kwargs)
-    return {
-        "stale_logs_marked_for_retry": resumed_logs,
-        "parse_logs_enqueued": True,
-    }
diff --git a/metrics/tests/conftest.py b/metrics/tests/conftest.py
new file mode 100644
index 0000000..0260026
--- /dev/null
+++ b/metrics/tests/conftest.py
@@ -0,0 +1,60 @@
+from datetime import date
+from pathlib import Path
+
+import pytest
+
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
+
+@pytest.fixture
+def books_collection(db):
+    return Collection.objects.create(acron3="books", acron2="bk")
+
+
+@pytest.fixture
+def scl_collection(db):
+    return Collection.objects.create(acron3="scl", acron2="sc")
+
+
+@pytest.fixture
+def preprints_collection(db):
+    return Collection.objects.create(acron3="preprints", acron2="pp")
+
+
+@pytest.fixture
+def data_collection(db):
+    return Collection.objects.create(acron3="data", acron2="dt")
+
+
+@pytest.fixture
+def robots_list():
+    path = FIXTURES_DIR / "counter-robots.txt"
+    return path.read_text().splitlines()
+
+
+@pytest.fixture
+def mmdb_data():
+    path = FIXTURES_DIR / "map.mmdb"
+    return path.read_bytes()
+
+
+@pytest.fixture
+def log_file_factory(db):
+    def _create(collection, hash_value, **kwargs):
+        defaults = {
+            "path": f"/tmp/{hash_value}.log.gz",
+            "stat_result": {},
+            "status": choices.LOG_FILE_STATUS_QUEUED,
+            "date": date(2024, 1, 15),
+            "validation": {"probably_date": "2024-01-15"},
+        }
+        defaults.update(kwargs)
+        return LogFile.objects.create(
+            collection=collection, hash=hash_value, **defaults
+        )
+
+    return _create
diff --git a/metrics/tests/counter/__init__.py b/metrics/tests/counter/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/tests/counter/access/__init__.py b/metrics/tests/counter/access/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/tests/counter/access/test_accumulation.py b/metrics/tests/counter/access/test_accumulation.py
new file mode 100644
index 0000000..ccf9044
--- /dev/null
+++ b/metrics/tests/counter/access/test_accumulation.py
@@ -0,0 +1,183 @@
+import unittest
+from datetime import datetime
+
+from scielo_usage_counter.values import (
+    CONTENT_TYPE_FULL_TEXT,
+    DEFAULT_SCIELO_ISSN,
+    MEDIA_FORMAT_HTML,
+    MEDIA_FORMAT_PDF,
+)
+
+from metrics.counter.access import accumulation
+
+
+class TestAccumulation(unittest.TestCase):
+    def _book_counter_access(self, **overrides):
+        base = {
+            "collection": "books",
+            "source_type": "book",
+            "source_id": "q7gtd",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_v2": None,
+            "pid_v3": None,
+            "pid_generic": "BOOK:Q7GTD",
+            "title_pid_generic": "BOOK:Q7GTD",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+            "publication_year": "2023",
+            "document_title": "Book Title",
+            "source_main_title": "Book Title",
+            "source_subject_area_capes": [],
+            "source_subject_area_wos": [],
+            "source_acronym": None,
+            "source_publisher_name": ["SciELO Books"],
+        }
+        base.update(overrides)
+        return base
+
+    def _line(self, **overrides):
+        base = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "127.0.0.1",
+            "country_code": "BR",
+            "local_datetime": datetime(2024, 1, 15, 10, 0, 5),
+        }
+        base.update(overrides)
+        return base
+
+    def test_stores_source_and_periods(self):
+        results = {}
+        accumulation.accumulate(results, self._book_counter_access(), self._line())
+
+        self.assertEqual(len(results), 1)
+        result = next(iter(results.values()))
+        self.assertEqual(result["source"]["source_type"], "book")
+        self.assertEqual(result["source"]["source_id"], "q7gtd")
+        self.assertEqual(result["source"]["main_title"], "Book Title")
+        self.assertEqual(result["access_date"], "2024-01-15")
+        self.assertEqual(result["access_month"], "202401")
+        self.assertEqual(result["access_year"], "2024")
+        self.assertEqual(result["access_country_code"], "BR")
+        self.assertEqual(result["content_language"], "en")
+        self.assertEqual(result["title_pid_generic"], "BOOK:Q7GTD")
+        self.assertEqual(result["document"], {"title": "Book Title"})
+        self.assertIn("user_session_id", result)
+
+    def test_rejects_invalid_local_datetime(self):
+        results = {}
+        with self.assertRaises(ValueError):
+            accumulation.accumulate(
+                results,
+                self._book_counter_access(),
+                self._line(local_datetime=None),
+            )
+        self.assertEqual(results, {})
+
+    def test_does_not_expand_book_into_segments(self):
+        results = {}
+        counter_access = self._book_counter_access(
+            source_id="c2248",
+            pid_generic="BOOK:C2248",
+            title_pid_generic="BOOK:C2248",
+            segment_pid_generics=[
+                "BOOK:C2248/CHAPTER:00",
+                "BOOK:C2248/CHAPTER:01",
+                "BOOK:C2248/CHAPTER:02",
+            ],
+            media_format=MEDIA_FORMAT_PDF,
+            media_language="pt",
+            publication_year="2018",
+            source_main_title="C2248 Book",
+        )
+        accumulation.accumulate(results, counter_access, self._line())
+        self.assertEqual(len(results), 1)
+        result = list(results.values())[0]
+        self.assertEqual(result["pid_generic"], "BOOK:C2248")
+
+    def test_double_click_filter_uses_url_bucket_for_same_item(self):
+        results = {}
+        counter_access = self._book_counter_access(
+            source_id="c2248",
+            pid_generic="BOOK:C2248/CHAPTER:03",
+            title_pid_generic="BOOK:C2248",
+            media_language="pt",
+            publication_year="2018",
+            source_main_title="C2248 Book",
+        )
+
+        accumulation.accumulate(
+            results,
+            counter_access,
+            self._line(
+                local_datetime=datetime(2024, 1, 15, 10, 0, 5),
+                url="/id/c2248/03",
+            ),
+        )
+        accumulation.accumulate(
+            results,
+            counter_access,
+            self._line(
+                local_datetime=datetime(2024, 1, 15, 10, 0, 20),
+                url="https://books.scielo.org/id/c2248/epub/03.html?x=1",
+            ),
+        )
+
+        raw = next(iter(results.values()))
+        self.assertEqual(
+            set(raw["click_timestamps_by_url"]),
+            {"/id/c2248/03", "/id/c2248/epub/03.html"},
+        )
+
+    def test_same_url_within_window_produces_single_url_bucket(self):
+        results = {}
+        counter_access = self._book_counter_access(
+            source_id="c2248",
+            pid_generic="BOOK:C2248/CHAPTER:03",
+            title_pid_generic="BOOK:C2248",
+            media_language="pt",
+            publication_year="2018",
+            source_main_title="C2248 Book",
+        )
+
+        accumulation.accumulate(
+            results,
+            counter_access,
+            self._line(
+                local_datetime=datetime(2024, 1, 15, 10, 0, 5),
+                url="/id/c2248/03?from=search",
+            ),
+        )
+        accumulation.accumulate(
+            results,
+            counter_access,
+            self._line(
+                local_datetime=datetime(2024, 1, 15, 10, 0, 20),
+                url="/id/c2248/03?from=search",
+            ),
+        )
+
+        raw = next(iter(results.values()))
+        self.assertEqual(
+            raw["click_timestamps_by_url"],
+            {"/id/c2248/03": {"00:05": 1, "00:20": 1}},
+        )
+
+    def test_generates_session_id_from_client_ip_datetime(self):
+        results = {}
+        accumulation.accumulate(results, self._book_counter_access(), self._line())
+        result = next(iter(results.values()))
+        self.assertEqual(
+            result["user_session_id"], "browser|1.0|127.0.0.1|2024-01-15|10"
+        )
+
+    def test_ipv6_address_is_accepted(self):
+        results = {}
+        accumulation.accumulate(
+            results,
+            self._book_counter_access(),
+            self._line(ip_address="2001:4860:7:1103::"),
+        )
+        result = next(iter(results.values()))
+        self.assertIn("2001:4860:7:1103::", result["user_session_id"])
diff --git a/metrics/tests/counter/access/test_extraction.py b/metrics/tests/counter/access/test_extraction.py
new file mode 100644
index 0000000..e89705c
--- /dev/null
+++ b/metrics/tests/counter/access/test_extraction.py
@@ -0,0 +1,208 @@
+import unittest
+
+from scielo_usage_counter.values import (
+    CONTENT_TYPE_ABSTRACT,
+    CONTENT_TYPE_FULL_TEXT,
+    DEFAULT_SCIELO_ISSN,
+    MEDIA_FORMAT_HTML,
+    MEDIA_FORMAT_PDF,
+)
+
+from metrics.counter.access import extraction
+
+
+class TestExtraction(unittest.TestCase):
+    def test_normalizes_source_fields_for_journal(self):
+        data = extraction.extract(
+            "scl",
+            {
+                "scielo_issn": "1234-5678",
+                "pid_v2": "S0102-67202020000100001",
+                "media_language": "en",
+                "media_format": MEDIA_FORMAT_PDF,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "publication_year": "2024",
+                "journal_main_title": "Journal Title",
+                "journal_subject_area_capes": ["Health Sciences"],
+                "journal_subject_area_wos": ["Medicine"],
+                "journal_acronym": "testjou",
+                "journal_publisher_name": ["SciELO"],
+            },
+        )
+
+        self.assertEqual(data["source_type"], "journal")
+        self.assertEqual(data["source_id"], "1234-5678")
+        self.assertEqual(data["source_main_title"], "Journal Title")
+        self.assertEqual(data["source_acronym"], "testjou")
+
+    def test_normalizes_source_fields_for_books(self):
+        data = extraction.extract(
+            "books",
+            {
+                "source_type": "book",
+                "source_id": "q7gtd",
+                "document_type": "chapter",
+                "book_id": "q7gtd",
+                "book_title": "Book Title",
+                "title_pid_generic": "book:q7gtd",
+                "pid_generic": "book:q7gtd/chapter:03",
+                "media_language": "en",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "publication_year": "2023",
+            },
+        )
+
+        self.assertEqual(data["source_type"], "book")
+        self.assertEqual(data["source_id"], "q7gtd")
+        self.assertEqual(data["scielo_issn"], DEFAULT_SCIELO_ISSN)
+        self.assertEqual(data["source_main_title"], "Book Title")
+        self.assertEqual(data["title_pid_generic"], "BOOK:Q7GTD")
+
+    def test_preserves_access_url_and_free_to_read(self):
+        data = extraction.extract(
+            "books",
+            {
+                "source_type": "book",
+                "source_id": "c2248",
+                "document_type": "book",
+                "book_id": "c2248",
+                "book_title": "Book Title",
+                "title_pid_generic": "book:c2248",
+                "pid_generic": "book:c2248",
+                "media_language": "pt",
+                "media_format": MEDIA_FORMAT_PDF,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_url": "/id/c2248/pdf/freitas-9788599662830.pdf",
+                "source_access_type": "free_to_read",
+            },
+        )
+
+        self.assertEqual(data["access_url"], "/id/c2248/pdf/freitas-9788599662830.pdf")
+        self.assertEqual(data["counter_access_type"], "Free_To_Read")
+
+    def test_tolerates_malformed_media_language(self):
+        data = extraction.extract(
+            "books",
+            {
+                "source_type": "book",
+                "source_id": "q7gtd",
+                "document_type": "book",
+                "book_id": "q7gtd",
+                "pid_generic": "book:q7gtd",
+                "media_language": "'",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+
+        self.assertEqual(data["media_language"], "un")
+
+    def test_sets_document_title_by_type(self):
+        chapter = extraction.extract(
+            "books",
+            {
+                "source_type": "book",
+                "source_id": "q7gtd",
+                "document_type": "chapter",
+                "book_id": "q7gtd",
+                "chapter_id": "03",
+                "pid_generic": "book:q7gtd/chapter:03",
+                "book_title": "Book Title",
+                "chapter_title": "Chapter Title",
+                "media_format": MEDIA_FORMAT_HTML,
+                "media_language": "en",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+        book = extraction.extract(
+            "books",
+            {
+                "source_type": "book",
+                "source_id": "q7gtd",
+                "document_type": "book",
+                "book_id": "q7gtd",
+                "pid_generic": "book:q7gtd",
+                "book_title": "Book Title",
+                "media_format": MEDIA_FORMAT_HTML,
+                "media_language": "en",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+        article = extraction.extract(
+            "scl",
+            {
+                "scielo_issn": "1234-5678",
+                "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+                "article_title": "Article Title",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+
+        self.assertEqual(chapter["document_title"], "Chapter Title")
+        self.assertEqual(book["document_title"], "Book Title")
+        self.assertEqual(article["document_title"], "Article Title")
+
+    def test_normalizes_collection_document_types(self):
+        preprint = extraction.extract(
+            "preprints",
+            {
+                "pid_generic": "10.1590/SciELOPreprints.1234",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+        dataset = extraction.extract(
+            "data",
+            {
+                "pid_generic": "10.48331/scielodata.abc123",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_ABSTRACT,
+            },
+        )
+        article = extraction.extract(
+            "scl",
+            {
+                "scielo_issn": "1234-5678",
+                "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+
+        self.assertEqual(preprint["source_type"], "preprint_server")
+        self.assertEqual(preprint["document_type"], "preprint")
+        self.assertEqual(dataset["source_type"], "data_repository")
+        self.assertEqual(dataset["document_type"], "dataset")
+        self.assertEqual(article["source_type"], "journal")
+        self.assertEqual(article["document_type"], "article")
+
+    def test_empty_or_none_translated_url_returns_empty_dict(self):
+        self.assertEqual(extraction.extract("scl", None), {})
+        self.assertEqual(extraction.extract("scl", {}), {})
+
+    def test_counter_access_type_defaults_to_open(self):
+        data = extraction.extract(
+            "scl",
+            {
+                "scielo_issn": "1234-5678",
+                "pid_v3": "abc123",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            },
+        )
+        self.assertEqual(data["counter_access_type"], "Open")
+
+    def test_commercial_access_type_maps_to_controlled(self):
+        data = extraction.extract(
+            "scl",
+            {
+                "scielo_issn": "1234-5678",
+                "pid_v3": "abc123",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "source_access_type": "commercial",
+            },
+        )
+        self.assertEqual(data["counter_access_type"], "Controlled")
diff --git a/metrics/tests/counter/access/test_validation.py b/metrics/tests/counter/access/test_validation.py
new file mode 100644
index 0000000..83f030c
--- /dev/null
+++ b/metrics/tests/counter/access/test_validation.py
@@ -0,0 +1,169 @@
+import unittest
+
+from scielo_usage_counter.values import (
+    CONTENT_TYPE_ABSTRACT,
+    CONTENT_TYPE_FULL_TEXT,
+    CONTENT_TYPE_UNDEFINED,
+    DEFAULT_SCIELO_ISSN,
+    MEDIA_FORMAT_HTML,
+    MEDIA_FORMAT_PDF,
+    MEDIA_FORMAT_UNDEFINED,
+)
+
+from metrics.counter.access import validation
+
+
+class TestValidation(unittest.TestCase):
+    def test_valid_journal_access(self):
+        data = {
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertTrue(result)
+
+    def test_valid_book_source(self):
+        data = {
+            "source_type": "book",
+            "source_id": "q7gtd",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_generic": "BOOK:Q7GTD",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertTrue(result)
+
+    def test_undefined_media_format_is_invalid(self):
+        data = {
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_UNDEFINED,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertFalse(result)
+
+    def test_undefined_content_type_is_invalid(self):
+        data = {
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_UNDEFINED,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertFalse(result)
+
+    def test_missing_all_pids_is_invalid(self):
+        data = {
+            "scielo_issn": "1234-5678",
+            "pid_v2": "",
+            "pid_v3": "",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertFalse(result)
+
+    def test_html_format_is_valid(self):
+        data = {
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertTrue(result)
+
+    def test_abstract_content_type_is_valid(self):
+        data = {
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_ABSTRACT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertTrue(result)
+
+    def test_dataset_without_source_or_language_is_valid(self):
+        data = {
+            "document_type": "dataset",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_v2": None,
+            "pid_v3": None,
+            "pid_generic": "DOI:10.48331/SCIELODATA.JLMAIY",
+            "media_language": "un",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_ABSTRACT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertTrue(result)
+
+    def test_missing_media_language_is_invalid(self):
+        data = {
+            "scielo_issn": "1234-5678",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertFalse(result)
+
+    def test_missing_scielo_issn_for_article_is_invalid(self):
+        data = {
+            "scielo_issn": "",
+            "pid_v2": "S0102-67202020000100001",
+            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_PDF,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertFalse(result)
+
+    def test_preprint_requires_pid_generic(self):
+        data = {
+            "document_type": "preprint",
+            "pid_v2": None,
+            "pid_v3": "abc123",
+            "pid_generic": "",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertFalse(result)
+
+    def test_chapter_requires_source_id(self):
+        data = {
+            "document_type": "chapter",
+            "source_id": "",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_generic": "BOOK:Q7GTD/CHAPTER:03",
+            "media_language": "en",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+        }
+        result, _ = validation.is_valid(data)
+        self.assertFalse(result)
+
+    def test_non_dict_input_is_invalid(self):
+        result, check = validation.is_valid(None)
+        self.assertFalse(result)
+        self.assertEqual(check["code"], "invalid_format")
diff --git a/metrics/tests/counter/indexing/__init__.py b/metrics/tests/counter/indexing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/tests/counter/indexing/test_converter.py b/metrics/tests/counter/indexing/test_converter.py
new file mode 100644
index 0000000..184e871
--- /dev/null
+++ b/metrics/tests/counter/indexing/test_converter.py
@@ -0,0 +1,472 @@
+import unittest
+
+from scielo_usage_counter.values import (
+    CONTENT_TYPE_ABSTRACT,
+    CONTENT_TYPE_FULL_TEXT,
+    DEFAULT_SCIELO_ISSN,
+    MEDIA_FORMAT_HTML,
+)
+
+from metrics.counter.indexing import converter as index_docs
+
+
+class TestConverter(unittest.TestCase):
+    def test_creates_month_and_year_views_for_book_chapter(self):
+        data = {
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|browser|1.0|127.0.0.1|BR|en|html|full_text": {
+                "collection": "books",
+                "source_key": "q7gtd",
+                "document_type": "chapter",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:Q7GTD/CHAPTER:03",
+                "document": {"title": "Chapter Title"},
+                "title_pid_generic": "BOOK:Q7GTD",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "en",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "q7gtd",
+                    "scielo_issn": DEFAULT_SCIELO_ISSN,
+                    "main_title": "Book Title",
+                    "identifiers": {"book_id": "q7gtd", "isbn": "9788578791889"},
+                    "city": "Sao Paulo",
+                    "country": "BR",
+                    "subject_area_capes": [],
+                    "subject_area_wos": [],
+                    "acronym": None,
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2023",
+            }
+        }
+
+        metrics_data = index_docs.convert(data)
+
+        self.assertEqual(set(metrics_data.keys()), {"month", "year"})
+        self.assertEqual(len(metrics_data["month"]), 2)
+        self.assertEqual(len(metrics_data["year"]), 2)
+
+        month_item = metrics_data["month"][
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|2024-01|Open|Regular|2023"
+        ]
+        self.assertEqual(month_item["access"], {"month": "2024-01"})
+        self.assertIn("daily_metrics", month_item)
+        self.assertNotIn("access_country_code", month_item)
+        self.assertNotIn("content_language", month_item)
+        self.assertEqual(month_item["document"]["id"], "BOOK:Q7GTD/CHAPTER:03")
+        self.assertEqual(month_item["document"]["type"], "chapter")
+        self.assertEqual(month_item["document"]["title"], "Chapter Title")
+        self.assertEqual(month_item["document"]["parent_id"], "BOOK:Q7GTD")
+        self.assertEqual(month_item["document"]["publication_year"], "2023")
+        self.assertEqual(month_item["document"]["identifiers"]["book_id"], "q7gtd")
+        self.assertEqual(month_item["document"]["identifiers"]["chapter_id"], "03")
+        self.assertEqual(month_item["document"]["identifiers"]["isbn"], "9788578791889")
+        self.assertNotIn("pid_generic", month_item["document"]["identifiers"])
+        self.assertEqual(month_item["counter"]["metric_scope"], "item")
+        self.assertEqual(month_item["counter"]["data_type"], "Book_Segment")
+        self.assertEqual(month_item["total_requests"], 1)
+        self.assertEqual(month_item["unique_requests"], 1)
+        self.assertNotIn("scielo_issn", month_item["source"])
+        self.assertNotIn("book_id", month_item["source"].get("identifiers", {}))
+        self.assertEqual(month_item["source"]["publisher_name"], ["SciELO Books"])
+
+        month_title = metrics_data["month"][
+            "title|books|q7gtd|||BOOK:Q7GTD|2024-01|Open|Regular|2023"
+        ]
+        self.assertEqual(month_title["document"]["id"], "BOOK:Q7GTD")
+        self.assertEqual(month_title["document"]["type"], "book")
+        self.assertEqual(month_title["document"]["title"], "Book Title")
+        self.assertNotIn("parent_id", month_title["document"])
+        self.assertEqual(month_title["counter"]["metric_scope"], "title")
+        self.assertEqual(month_title["counter"]["data_type"], "Book")
+        self.assertEqual(month_title["total_requests"], 1)
+        self.assertEqual(month_title["total_investigations"], 1)
+        self.assertEqual(month_title["unique_requests"], 1)
+        self.assertEqual(month_title["unique_investigations"], 1)
+
+        year_item = metrics_data["year"][
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|en|BR|2024|Open|Regular|2023"
+        ]
+        self.assertEqual(
+            year_item["access"],
+            {"year": "2024", "country_code": "BR", "content_language": "en"},
+        )
+        self.assertNotIn("daily_metrics", year_item)
+        self.assertEqual(year_item["document"]["title"], "Chapter Title")
+        self.assertEqual(year_item["counter"]["metric_scope"], "item")
+        self.assertEqual(year_item["total_requests"], 1)
+
+        year_title = metrics_data["year"][
+            "title|books|q7gtd|||BOOK:Q7GTD|en|BR|2024|Open|Regular|2023"
+        ]
+        self.assertEqual(year_title["counter"]["metric_scope"], "title")
+        self.assertEqual(year_title["document"]["title"], "Book Title")
+        self.assertNotIn("daily_metrics", year_title)
+        self.assertEqual(year_title["total_requests"], 1)
+        self.assertEqual(year_title["total_investigations"], 1)
+        self.assertEqual(year_title["unique_requests"], 1)
+        self.assertEqual(year_title["unique_investigations"], 1)
+
+    def test_maps_counter_data_types_for_preprint_and_dataset(self):
+        data = {
+            "preprints|scielo-preprints|||10.1590/SCIELOPREPRINTS.1234|sess|BR|un|html|full_text": {
+                "collection": "preprints",
+                "source_key": "scielo-preprints",
+                "document_type": "preprint",
+                "pid_generic": "10.1590/SCIELOPREPRINTS.1234",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "un",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "preprint_server",
+                    "source_id": "scielo-preprints",
+                    "main_title": "SciELO Preprints",
+                },
+                "publication_year": "2024",
+            },
+            "data|scielo-data|||10.48331/SCIELODATA.ABC123|sess|BR|un|html|abstract": {
+                "collection": "data",
+                "source_key": "scielo-data",
+                "document_type": "dataset",
+                "pid_generic": "10.48331/SCIELODATA.ABC123",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "un",
+                "content_type": CONTENT_TYPE_ABSTRACT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "data_repository",
+                    "source_id": "scielo-data",
+                    "main_title": "SciELO Data",
+                },
+                "publication_year": "2024",
+            },
+        }
+
+        metrics_data = index_docs.convert(data)
+        preprint_doc = metrics_data["month"][
+            "preprints|scielo-preprints|||10.1590/SCIELOPREPRINTS.1234|2024-01|Open|Regular|2024"
+        ]
+        dataset_doc = metrics_data["month"][
+            "data|scielo-data|||10.48331/SCIELODATA.ABC123|2024-01|Open|Regular|2024"
+        ]
+
+        self.assertEqual(preprint_doc["counter"]["data_type"], "Article")
+        self.assertEqual(preprint_doc["document"]["type"], "preprint")
+        self.assertEqual(preprint_doc["document"]["id"], "10.1590/SCIELOPREPRINTS.1234")
+        self.assertEqual(preprint_doc["counter"]["article_version"], "Preprint")
+        self.assertEqual(dataset_doc["counter"]["data_type"], "Dataset")
+        self.assertNotIn("article_version", dataset_doc["counter"])
+
+    def test_dedupes_book_unique_item_across_formats(self):
+        data = {
+            "books|c2248|||BOOK:C2248/CHAPTER:03|sess|BR|pt|html|full_text": {
+                "collection": "books",
+                "source_key": "c2248",
+                "document_type": "chapter",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:C2248/CHAPTER:03",
+                "title_pid_generic": "BOOK:C2248",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "pt",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "c2248",
+                    "main_title": "C2248 Book",
+                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2018",
+            },
+            "books|c2248|||BOOK:C2248/CHAPTER:03|sess|BR|pt|pdf|full_text": {
+                "collection": "books",
+                "source_key": "c2248",
+                "document_type": "chapter",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:C2248/CHAPTER:03",
+                "title_pid_generic": "BOOK:C2248",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:45": 1},
+                "access_country_code": "BR",
+                "content_language": "pt",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "c2248",
+                    "main_title": "C2248 Book",
+                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2018",
+            },
+        }
+
+        metrics_data = index_docs.convert(data)
+        month_item = metrics_data["month"][
+            "books|c2248|||BOOK:C2248/CHAPTER:03|2024-01|Open|Regular|2018"
+        ]
+        month_title = metrics_data["month"][
+            "title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"
+        ]
+
+        self.assertEqual(month_item["total_requests"], 2)
+        self.assertEqual(month_item["total_investigations"], 2)
+        self.assertEqual(month_item["unique_requests"], 1)
+        self.assertEqual(month_item["unique_investigations"], 1)
+        self.assertEqual(month_title["unique_requests"], 1)
+        self.assertEqual(month_title["unique_investigations"], 1)
+
+    def test_skips_book_landing_page_from_item_scope(self):
+        data = {
+            "books|c2248|||BOOK:C2248|sess|BR|pt|html|abstract": {
+                "collection": "books",
+                "source_key": "c2248",
+                "document_type": "book",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:C2248",
+                "document": {"title": "C2248 Book"},
+                "title_pid_generic": "BOOK:C2248",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "pt",
+                "content_type": CONTENT_TYPE_ABSTRACT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "c2248",
+                    "main_title": "C2248 Book",
+                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2018",
+            },
+        }
+
+        metrics_data = index_docs.convert(data)
+        self.assertEqual(
+            set(metrics_data["month"].keys()),
+            {"title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"},
+        )
+        self.assertEqual(
+            set(metrics_data["year"].keys()),
+            {"title|books|c2248|||BOOK:C2248|pt|BR|2024|Open|Regular|2018"},
+        )
+
+    def test_whole_book_without_segments_counts_as_book_segment(self):
+        data = {
+            "books|c2248|||BOOK:C2248|sess|BR|pt|pdf|full_text": {
+                "collection": "books",
+                "source_key": "c2248",
+                "document_type": "book",
+                "pid_v2": None,
+                "pid_v3": None,
+                "pid_generic": "BOOK:C2248",
+                "document": {"title": "C2248 Book"},
+                "title_pid_generic": "BOOK:C2248",
+                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "pt",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_month": "202401",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "c2248",
+                    "main_title": "C2248 Book",
+                    "identifiers": {"book_id": "c2248"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2018",
+            },
+        }
+
+        metrics_data = index_docs.convert(data)
+        month_item = metrics_data["month"][
+            "books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"
+        ]
+        month_title = metrics_data["month"][
+            "title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"
+        ]
+
+        self.assertEqual(month_item["counter"]["data_type"], "Book_Segment")
+        self.assertEqual(month_item["counter"]["metric_scope"], "item")
+        self.assertEqual(month_item["document"]["id"], "BOOK:C2248")
+        self.assertNotIn("parent_id", month_item["document"])
+        self.assertEqual(month_title["counter"]["data_type"], "Book")
+        self.assertEqual(month_title["counter"]["metric_scope"], "title")
+
+    def test_aggregates_multiple_chapters_at_title_level(self):
+        data = {
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:01|session1|BR|en|html|full_text": {
+                "collection": "books",
+                "source_key": "q7gtd",
+                "document_type": "chapter",
+                "pid_generic": "BOOK:Q7GTD/CHAPTER:01",
+                "title_pid_generic": "BOOK:Q7GTD",
+                "user_session_id": "session1",
+                "click_timestamps": {"00:05": 1},
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "q7gtd",
+                    "scielo_issn": DEFAULT_SCIELO_ISSN,
+                    "main_title": "Book Title",
+                    "identifiers": {"book_id": "q7gtd"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2023",
+            },
+            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:02|session1|BR|en|html|full_text": {
+                "collection": "books",
+                "source_key": "q7gtd",
+                "document_type": "chapter",
+                "pid_generic": "BOOK:Q7GTD/CHAPTER:02",
+                "title_pid_generic": "BOOK:Q7GTD",
+                "user_session_id": "session1",
+                "click_timestamps": {"00:10": 1},
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "book",
+                    "source_id": "q7gtd",
+                    "scielo_issn": DEFAULT_SCIELO_ISSN,
+                    "main_title": "Book Title",
+                    "identifiers": {"book_id": "q7gtd"},
+                    "publisher_name": ["SciELO Books"],
+                },
+                "publication_year": "2023",
+            },
+        }
+
+        metrics_data = index_docs.convert(data)
+        self.assertEqual(len(metrics_data["month"]), 3)
+        self.assertEqual(len(metrics_data["year"]), 3)
+
+        month_title = metrics_data["month"][
+            "title|books|q7gtd|||BOOK:Q7GTD|2024-01|Open|Regular|2023"
+        ]
+        self.assertEqual(month_title["total_requests"], 2)
+        self.assertEqual(month_title["total_investigations"], 2)
+        self.assertEqual(month_title["unique_requests"], 1)
+        self.assertEqual(month_title["unique_investigations"], 1)
+
+    def test_double_click_collapses_same_url_within_30_seconds(self):
+        from datetime import datetime
+
+        from metrics.counter.access import accumulation
+
+        results = {}
+        counter_access = {
+            "collection": "books",
+            "source_type": "book",
+            "source_id": "c2248",
+            "scielo_issn": DEFAULT_SCIELO_ISSN,
+            "pid_v2": None,
+            "pid_v3": None,
+            "pid_generic": "BOOK:C2248/CHAPTER:03",
+            "title_pid_generic": "BOOK:C2248",
+            "media_language": "pt",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+            "publication_year": "2018",
+            "source_main_title": "C2248 Book",
+        }
+        base_line = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "127.0.0.1",
+            "country_code": "BR",
+            "url": "/id/c2248/03?from=search",
+        }
+
+        accumulation.accumulate(
+            results,
+            counter_access,
+            {**base_line, "local_datetime": datetime(2024, 1, 15, 10, 0, 5)},
+        )
+        accumulation.accumulate(
+            results,
+            counter_access,
+            {**base_line, "local_datetime": datetime(2024, 1, 15, 10, 0, 20)},
+        )
+
+        metrics_data = index_docs.convert(results)
+        month_item = metrics_data["month"][
+            "books|c2248|||BOOK:C2248/CHAPTER:03|2024-01|Open|Regular|2018"
+        ]
+        self.assertEqual(month_item["total_requests"], 1)
+        self.assertEqual(month_item["unique_requests"], 1)
+
+    def test_article_pipeline_sets_journal_parent(self):
+        data = {
+            "scl|1234-5678||abc123||sess|BR|en|pdf|full_text": {
+                "collection": "scl",
+                "source_key": "1234-5678",
+                "document_type": "article",
+                "pid_v2": None,
+                "pid_v3": "abc123",
+                "pid_generic": None,
+                "document": {"title": "Article Title"},
+                "user_session_id": "sess",
+                "click_timestamps": {"00:05": 1},
+                "access_country_code": "BR",
+                "content_language": "en",
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "access_date": "2024-01-15",
+                "access_year": "2024",
+                "source": {
+                    "source_type": "journal",
+                    "source_id": "1234-5678",
+                    "scielo_issn": "1234-5678",
+                    "main_title": "Test Journal",
+                },
+                "publication_year": "2024",
+            }
+        }
+
+        metrics_data = index_docs.convert(data)
+        month_doc = list(metrics_data["month"].values())[0]
+
+        self.assertEqual(month_doc["counter"]["data_type"], "Article")
+        self.assertEqual(month_doc["counter"]["parent_data_type"], "Journal")
+        self.assertEqual(month_doc["counter"]["metric_scope"], "item")
+        self.assertEqual(month_doc["document"]["type"], "article")
+        self.assertEqual(month_doc["total_requests"], 1)
+        self.assertEqual(month_doc["total_investigations"], 1)
+
+    def test_non_dict_input_returns_empty(self):
+        result = index_docs.convert(None)
+        self.assertEqual(result, {"month": {}, "year": {}})
diff --git a/metrics/tests/fixtures/counter-robots.txt b/metrics/tests/fixtures/counter-robots.txt
new file mode 100644
index 0000000..f206558
--- /dev/null
+++ b/metrics/tests/fixtures/counter-robots.txt
@@ -0,0 +1,307 @@
+bot
+^Buck\/[0-9]
+spider
+crawl
+^.?$
+[^a]fish
+^IDA$
+^ruby$
+^@ozilla\/\d
+^脝脝陆芒潞贸碌脛$
+^破解后的$
+AddThis
+A6-Indexer
+ADmantX
+alexa
+Alexandria(\s|\+)prototype(\s|\+)project
+AllenTrack
+almaden
+appie
+API[\+\s]scraper
+Arachni
+Arachmo
+architext
+ArchiveTeam
+aria2\/\d
+arks
+^Array$
+asterias
+atomz
+BDFetch
+Betsie
+baidu
+biglotron
+BingPreview
+binlar
+bjaaland
+Blackboard[\+\s]Safeassign
+blaiz-bee
+bloglines
+blogpulse
+boitho\.com-dc
+bookmark-manager
+Brutus\/AET
+BUbiNG
+bwh3_user_agent
+CakePHP
+celestial
+cfnetwork
+checklink
+checkprivacy
+China\sLocal\sBrowse\s2\.6
+Citoid
+cloakDetect
+coccoc\/1\.0
+Code\sSample\sWeb\sClient
+ColdFusion
+collection@infegy.com
+com\.plumanalytics
+combine
+contentmatch
+ContentSmartz
+convera
+core
+Cortana
+CoverScout
+crusty\/\d
+curl\/
+cursor
+custo
+DataCha0s\/2\.0
+daum(oa)?
+^\%?default\%?$
+DeuSu\/
+Dispatch\/\d
+Docoloc
+docomo
+Download\+Master
+Drupal
+DSurf
+DTS Agent
+EasyBib[\+\s]AutoCite[\+\s]
+easydl
+EBSCO\sEJS\sContent\sServer
+EcoSearch
+ELinks\/
+EmailSiphon
+EmailWolf
+Embedly
+EThOS\+\(British\+Library\)
+facebookexternalhit\/
+favorg
+FDM(\s|\+)\d
+Feedbin
+feedburner
+FeedFetcher
+feedreader
+ferret
+Fetch(\s|\+)API(\s|\+)Request
+findlinks
+findthatfile
+^FileDown$
+^Filter$
+^firefox$
+^FOCA
+Fulltext
+Funnelback
+Genieo
+GetRight
+geturl
+GigablastOpenSource
+G-i-g-a-b-o-t
+GLMSLinkAnalysis
+Goldfire(\s|\+)Server
+google
+Grammarly
+grub
+gulliver
+gvfs\/
+harvest
+heritrix
+holmes
+htdig
+htmlparser
+HttpComponents\/1.1
+HTTPFetcher
+http.?client
+httpget
+httrack
+ia_archiver
+ichiro
+iktomi
+ilse
+Indy Library
+^integrity\/\d
+internetseer
+intute
+iSiloX
+iskanie
+^java\/\d{1,2}.\d
+jeeves
+Jersey\/\d
+jobo
+kyluka
+larbin
+libcurl
+libhttp
+libwww
+lilina
+^LinkAnalyser
+link.?check
+LinkLint-checkonly
+^LinkParser\/
+^LinkSaver\/
+linkscan
+LinkTiger
+linkwalker
+lipperhey
+livejournal\.com
+LOCKSS
+LongURL.API
+ltx71
+lwp
+lycos[_+]
+mail\.ru
+MarcEdit
+mediapartners-google
+megite
+MetaURI[\+\s]API\/\d\.\d
+Microsoft(\s|\+)URL(\s|\+)Control
+Microsoft Office Existence Discovery
+Microsoft Office Protocol Discovery
+Microsoft-WebDAV-MiniRedir
+mimas
+mnogosearch
+moget
+motor
+^Mozilla$
+^Mozilla.4\.0$
+^Mozilla\/4\.0\+\(compatible;\)$
+^Mozilla\/4\.0\+\(compatible;\+ICS\)$
+^Mozilla\/4\.5\+\[en]\+\(Win98;\+I\)$
+^Mozilla.5\.0$
+^Mozilla\/5.0\+\(compatible;\+MSIE\+6\.0;\+Windows\+NT\+5\.0\)$
+^Mozilla\/5\.0\+like\+Gecko$
+^Mozilla\/5.0(\s|\+)Gecko\/20100115(\s|\+)Firefox\/3.6$
+^MSIE
+MuscatFerre
+myweb
+nagios
+^NetAnts\/\d
+netcraft
+netluchs
+newspaper\/\d
+ng\/2\.
+^Ning\/\d
+no_user_agent
+nomad
+nutch
+^oaDOI$
+ocelli
+Offline(\s|\+)Navigator
+OgScrper
+okhttp
+onetszukaj
+^Opera\/4$
+OurBrowser
+panscient
+parsijoo
+^Pattern\/\d
+Pcore-HTTP
+pear\.php\.net
+perman
+PHP\/
+pidcheck
+pioneer
+playmusic\.com
+playstarmusic\.com
+^Postgenomic(\s|\+)v2
+powermarks
+proximic
+PycURL
+python
+Qwantify
+rambler
+ReactorNetty\/\d
+Readpaper
+redalert
+Riddler
+robozilla
+rss
+scan4mail
+scientificcommons
+scirus
+scooter
+Scrapy\/\d
+ScoutJet
+^scrutiny\/\d
+SearchBloxIntra
+shoutcast
+Site24x7
+SkypeUriPreview
+slurp
+sogou
+speedy
+sqlmap
+SrceDAMP
+Strider
+summify
+sunrise
+Sysomos
+T\-H\-U\-N\-D\-E\-R\-S\-T\-O\-N\-E
+tailrank
+Teleport(\s|\+)Pro
+Teoma
+The\+Knowledge\+AI
+titan
+^Traackr\.com$
+Trello
+Trove
+Turnitin
+twiceler
+Typhoeus
+ucsd
+ultraseek
+^undefined$
+^unknown$
+Unpaywall
+URL2File
+urlaliasbuilder
+urllib
+^user.?agent$
+^User-Agent
+validator
+virus.detector
+voila
+^voltron$
+voyager\/
+w3af\.org
+Wanadoo
+Web(\s|\+)Downloader
+WebCloner
+webcollage
+WebCopier
+Webinator
+weblayers
+Webmetrics
+webmirror
+webmon
+weborama-fetcher
+webreaper
+WebStripper
+WebZIP
+Wget
+WhatsApp
+wordpress
+worm
+www\.gnip\.com
+WWW-Mechanize
+xenu
+y!j
+yacy
+yahoo
+yandex
+Yeti\/\d
+zeus
+zyborg
+7siters
diff --git a/metrics/tests/fixtures/map.mmdb b/metrics/tests/fixtures/map.mmdb
new file mode 100644
index 0000000..257d56e
Binary files /dev/null and b/metrics/tests/fixtures/map.mmdb differ
diff --git a/metrics/tests/fixtures/usage.books.log b/metrics/tests/fixtures/usage.books.log
new file mode 100644
index 0000000..103a339
--- /dev/null
+++ b/metrics/tests/fixtures/usage.books.log
@@ -0,0 +1,9 @@
+186.215.90.179 - - [01/Apr/2012:00:00:29 -0300] "GET /id/xjcw9 HTTP/1.1" 200 13833 "http://books.scielo.org/search/index.php" "Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B176 Safari/7534.48.3"
+189.97.101.205 - - [01/Apr/2012:00:30:27 -0300] "GET /id/h8pyf/08 HTTP/1.1" 200 10775 "http://books.scielo.org/search/index.php" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; MANM)"
+189.100.12.229 - - [01/Apr/2012:00:00:00 -0300] "GET /id/3hs/pdf/sampaio-9788523206277.pdf HTTP/1.1" 200 1057116 "http://books.scielo.org/id/3hs" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.83 Safari/535.11"
+216.189.176.205 - - [01/Apr/2012:00:41:33 -0300] "GET /id/hd5d8/epub/gelamo-9788598605951.epub HTTP/1.1" 200 239376 "http://books.scielo.org/id/hd5d8" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4"
+MISS|200|1757894367|5117|4384504|2001:4860:7:1103::|https://www.google.com/|https://books.scielo.org/id/96spq|MI|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36|8e4f6321a936335906fccc9c9d9211af|BR
+MISS|200|1757894377|5899|4384504|213.135.156.0|-|https://books.scielo.org/id/3dqnm/10|DE|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36|0f9f9bf8c4fe32029f8e881aebaac4a1|RU
+MISS|200|1757894381|715390|4384504|45.56.186.0|-|http://books.scielo.org/id/htnbt/pdf/caldeira-9788579830419-10.pdf|IL|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Trailer/93.3.8652.5|713aed01fd0c98aa682b5afe4e646b2c|US
+MISS|200|1757894328|10148382|4384504|170.23.5.0|-|https://books.scielo.org/id/wg88m/epub/ortigoza-9788579831287.epub|IL|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.3|e0d3c7f5b005566d3dbec37db52f674e|US
+MISS|200|1757894338|1377|4384504|166.88.79.0|-|https://books.scielo.org/id/p8kpd/Text/12.xhtml|DE|Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.3|bc11c7eaf60775124f454befbce23cca|PL
diff --git a/metrics/tests/fixtures/usage.dat.log b/metrics/tests/fixtures/usage.dat.log
new file mode 100644
index 0000000..cf40425
--- /dev/null
+++ b/metrics/tests/fixtures/usage.dat.log
@@ -0,0 +1,30 @@
+20.171.206.17 - - [01/Sep/2024:23:59:07 -0300] "GET /dataset.xhtml;jsessionid=0212e6bc89c71a2c0d48a3f76451?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&fileSortField=date&tagPresort=false HTTP/1.1" 200 30851 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:09 -0300] "GET /dataset.xhtml;jsessionid=0212e6bc89c71a2c0d48a3f76451?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileAccess=&fileTag=&fileSortField=date&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30941 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+192.168.1.116 - - [01/Sep/2024:23:59:10 -0300] "GET /api/dataverses/preprints HTTP/1.1" 200 2659 "-" "ops/3.3.0.14"
+192.168.169.235 - - [01/Sep/2024:23:59:13 -0300] "GET / HTTP/1.1" 200 28444 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36"
+20.171.206.17 - - [01/Sep/2024:23:59:14 -0300] "GET /dataset.xhtml;jsessionid=0212e6bc89c71a2c0d48a3f76451?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=%22Text%22&fileAccess=&fileTag=&fileSortField=date&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 29558 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:16 -0300] "GET /dataset.xhtml;jsessionid=0212e6bc89c71a2c0d48a3f76451?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=name&fileSortOrder=desc&tagPresort=false&folderPresort=true HTTP/1.1" 200 30960 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+192.168.1.116 - - [01/Sep/2024:23:59:17 -0300] "GET /api/dataverses/preprints HTTP/1.1" 200 2659 "-" "ops/3.3.0.14"
+20.171.206.17 - - [01/Sep/2024:23:59:20 -0300] "GET /dataset.xhtml;jsessionid=0212e6bc89c71a2c0d48a3f76451?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=date&fileSortOrder=desc&tagPresort=false HTTP/1.1" 200 30950 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:22 -0300] "GET /dataset.xhtml;jsessionid=0212e6bc89c71a2c0d48a3f76451?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=%22Document%22&fileAccess=&fileTag=&fileSortField=date&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30711 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+192.168.1.116 - - [01/Sep/2024:23:59:25 -0300] "GET /api/dataverses/preprints HTTP/1.1" 200 2659 "-" "ops/3.3.0.14"
+20.171.206.17 - - [01/Sep/2024:23:59:25 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&tagPresort=false&folderPresort=true HTTP/1.1" 200 30818 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:27 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=type&tagPresort=false HTTP/1.1" 200 30950 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:30 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=size&tagPresort=false HTTP/1.1" 200 30956 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+43.159.146.48 - - [01/Sep/2024:23:59:31 -0300] "GET /dataset.xhtml;jsessionid=e0171f1137481ce453dd659be0ac?persistentId=doi%3A10.48331%2Fscielodata.C4HFUF&version=&q=&fileTypeGroupFacet=&fileAccess=&tagPresort=false&folderPresort=true HTTP/1.1" 200 29722 "https://google.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
+20.171.206.17 - - [01/Sep/2024:23:59:32 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=name&fileSortOrder=desc&tagPresort=false&folderPresort=true HTTP/1.1" 200 30956 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:34 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileAccess=&fileTag=&fileSortField=&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30919 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:36 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30915 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:38 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=date&tagPresort=false HTTP/1.1" 200 30947 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:40 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=Restricted&fileTag=&fileSortField=&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30852 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:42 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=date&fileSortOrder=desc&tagPresort=false HTTP/1.1" 200 30950 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:43 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=%22Document%22&fileAccess=&fileTag=&fileSortField=&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30696 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:45 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileTag=&fileSortField=&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30939 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:47 -0300] "GET /dataset.xhtml;jsessionid=02146668a92ebd3249bc567cefcc?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=%22Text%22&fileAccess=&fileTag=&fileSortField=&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 29570 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+192.168.169.235 - - [01/Sep/2024:23:59:47 -0300] "GET / HTTP/1.1" 200 28437 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36"
+20.171.206.17 - - [01/Sep/2024:23:59:49 -0300] "GET /dataset.xhtml;jsessionid=0214dc416f0afe817703dcf8337e?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=size&tagPresort=false HTTP/1.1" 200 30946 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:51 -0300] "GET /dataset.xhtml;jsessionid=0214dc416f0afe817703dcf8337e?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=%22Document%22&fileAccess=&fileTag=&fileSortField=size&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30704 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:53 -0300] "GET /dataset.xhtml;jsessionid=0214dc416f0afe817703dcf8337e?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30921 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:55 -0300] "GET /dataset.xhtml;jsessionid=0214dc416f0afe817703dcf8337e?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=Restricted&fileTag=&fileSortField=size&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30864 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:57 -0300] "GET /dataset.xhtml;jsessionid=0214dc416f0afe817703dcf8337e?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileTypeGroupFacet=&fileAccess=&fileSortField=date&tagPresort=false HTTP/1.1" 200 30947 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
+20.171.206.17 - - [01/Sep/2024:23:59:59 -0300] "GET /dataset.xhtml;jsessionid=0214dc416f0afe817703dcf8337e?persistentId=doi%3A10.48331%2Fscielodata.S4BDSX&version=&q=&fileAccess=&fileTag=&fileSortField=size&fileSortOrder=&tagPresort=false&folderPresort=true HTTP/1.1" 200 30937 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)"
diff --git a/metrics/tests/fixtures/usage.log b/metrics/tests/fixtures/usage.log
new file mode 100644
index 0000000..8c50b2b
--- /dev/null
+++ b/metrics/tests/fixtures/usage.log
@@ -0,0 +1,200 @@
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /css/screen/layout.css HTTP/1.1" 404 427 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /article.js HTTP/1.1" 303 8231 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+192.168.1.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/fbpelogp.gif HTTP/1.1" 200 1353 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:50 -0300] "GET /xsl/pmc/v3.0/xml.css HTTP/1.1" 304 5766 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/grp1c.gif HTTP/1.1" 303 181 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:50 -0300] "GET /css/screen/general.css HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:50 -0300] "GET /css/screen/styles.css HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+89.155.115.203 - - [21/May/2021:11:30:37 -0300] "GET /scielo.php?script=sci_arttext&pid=S0102-69092018000300512 HTTP/1.1" 200 44995 "https://www.google.com/" "Mozilla/5.0 (iPhone; CPU iPhone OS 13_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/137.2.345735309 Mobile/15E148 Safari/604.1"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/artsrc.gif HTTP/1.1" 502 239 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/toc.gif HTTP/1.1" 200 164 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+192.168.2.155 - - [21/May/2021:23:58:50 -0300] "GET /img/en/prev.gif HTTP/1.1" 200 244 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/next.gif HTTP/1.1" 302 193 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "POST /img/en/author.gif HTTP/1.1" 204 219 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+190.232.0.1 - - [21/May/20231:23:58:50 -0300] "GET /img/en/fbpelogp.gif HTTP/1.1" 301 1353 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+67.205.129.249 - - [21/May/2021:05:05:16 -0300] "GET /scielo.php?download&pid=S0102-86502014000700465&format=EndNote HTTP/1.1" 200 491 "http://www.scielo.br/scielo.php?script=sci_isoref&pid=S0102-86502014000700465&lng=en" "LOCKSS cache"
+176.88.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/home.gif HTTP/1.1" 200 190 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:50 -0300] "PATCH /img/en/toc.gif HTTP/1.1" 200 164 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/search.gif HTTP/1.1" 200 210 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/home.gif HTTP/1.1" 200 190 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/alpha.gif HTTP/1.1" 200 220 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" ")"
+176.88.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/fulltxt.gif HTTP/1.1" 200 643 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/iconStatistics.gif HTTP/1.1" 200 1052 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:50 -0300] "GET  HTTP/1.1" 200 288 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/iconPDFDocument.gif HTTP/1.1" 200 628 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/iconXMLDocument.gif HTTP/1.1" 200 652 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:50 -0300] "GET /img/en/fulltxt.gif HTTP/1.1" 200 643 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/lattescv-button.gif HTTP/1.1" 200 1041 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconTranslation.gif HTTP/1.1" 200 578 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedOff.gif HTTP/1.1" 200 288 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedGoogleOn.gif HTTP/1.1" 200 641 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/grp1c.gif HTTP/1.1" 200 181 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedOff.gif HTTP/1.1" 200 262 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+191.240.0.2 - - [21/May/2021:23:58:51 -0300] "GET /xsl/plus/static/css/responsive.css HTTP/1.1" 200 16842 "https://www.scielo.br/scielo.php?pid=S1415-65552014000600874&script=sci_arttext_plus&tlng=pt" "Mozilla/5.0 (Linux; Android 8.1.0; SM-J260MU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedGoogleOn.gif HTTP/1.1" 200 625 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/common/iconPermalink.gif HTTP/1.1" 200 382 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/common/icon-close.png HTTP/1.1" 200 3091 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/home.gif HTTP/1.1" 200 190 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/subject.gif HTTP/1.1" 200 229 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconTranslation.gif HTTP/1.1" 200 578 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/revistas/ep/v30n1/a04tab01.gif HTTP/1.1" 200 47388 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/lattescv-button.gif HTTP/1.1" 200 1041 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/revistas/ep/v30n1/seta.gif HTTP/1.1" 200 164 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /article.js HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedOff.gif HTTP/1.1" 200 262 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/revistas/ep/v30n1/a04img01.gif HTTP/1.1" 200 115469 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+191.240.0.2 - - [21/May/2021:23:58:51 -0300] "GET /xsl/plus/static/css/style.css HTTP/1.1" 200 14769 "https://www.scielo.br/scielo.php?pid=S1415-65552014000600874&script=sci_arttext_plus&tlng=pt" "Mozilla/5.0 (Linux; Android 8.1.0; SM-J260MU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /scielo.php?script=sci_arttext&pid=S1679-39512005000100007 HTTP/1.1" 200 37818 "http://www.google.com/" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/fbpelogp.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconTranslation.gif HTTP/1.1" 200 578 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt HTTP/1.1" 200 35262 "https://scholar.google.com.br/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/revistas/ep/v30n1/a04img02.gif HTTP/1.1" 200 108500 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/grp1c.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/search.gif HTTP/1.1" 200 210 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/revistas/ep/v30n1/a04img03.gif HTTP/1.1" 200 51911 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+45.146.0.2 - - [21/May/2021:23:58:51 -0300] "HEAD /scielo.php?script=sci_arttext&pid=S1984-82502011000200005 HTTP/1.1" 200 19267 "http://www.scielo.br:80/scielo.php',(CAST((CHR(113)||CHR(98)||CHR(113)||CHR(98)||CHR(113))||(SELECT (CASE WHEN (3633=3633) THEN 1 ELSE 0 END))::text||(CHR(113)||CHR(98)||CHR(98)||CHR(107)||CHR(113)) AS NUMERIC))-- iAjd" "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; fr-fr) AppleWebKit/125.5.5 (KHTML, like Gecko) Safari/125.11"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconStatistics.gif HTTP/1.1" 200 1052 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/e-mailt.gif HTTP/1.1" 200 586 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedGoogleOn.gif HTTP/1.1" 200 641 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/artsrc.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/toc.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /css/screen.css HTTP/1.1" 200 89 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/common/iconPermalink.gif HTTP/1.1" 200 382 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/prev.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedGoogleOn.gif HTTP/1.1" 200 625 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/home.gif HTTP/1.1" 200 190 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+177.17.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/pt/fbpelogp.gif HTTP/1.1" 200 1353 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1415-43662015000600534&lng=pt&tlng=pt" "Mozilla/5.0 (Linux; Android 10; SM-A105M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.17.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/pt/artsrc.gif HTTP/1.1" 200 270 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1415-43662015000600534&lng=pt&tlng=pt" "Mozilla/5.0 (Linux; Android 10; SM-A105M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+45.146.0.2 - - [21/May/2021:23:58:51 -0300] "GET /scielo.php?script=sci_arttext&pid=S1984-82502015000200317 HTTP/1.1" 200 15467 "http://www.scielo.br:80/scielo.php" "-2051 OR 6187=(SELECT UPPER(XMLType(CHR(60)||CHR(58)||CHR(113)||CHR(122)||CHR(98)||CHR(122)||CHR(113)||(SELECT (CASE WHEN (6187=6187) THEN 1 ELSE 0 END) FROM DUAL)||CHR(113)||CHR(112)||CHR(122)||CHR(98)||CHR(113)||CHR(62))) FROM DUAL)# Gheq"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /xsl/pmc/v3.0/xml.css HTTP/1.1" 200 5766 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/next.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+114.119.0.0 - - [21/May/2021:23:58:51 -0300] "GET /scielo.php?script=sci_abstract&pid=S0104-59702008000200001&lng=pt&nrm=iso&tlng=pt HTTP/1.1" 404 574 "-" "Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)"
+45.70.0.0 - - [21/May/2021:23:58:51 -0300] "GET /pdf/ld/v16n2/1518-7632-ld-16-02-00261.pdf HTTP/1.1" 200 937525 "https://www.google.com/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+211.218.0.2 - - [21/May/2021:23:58:51 -0300] "GET /img/revistas/rb/v48n6//0100-3984-rb-48-06-0345-gf02.jpg HTTP/1.1" 200 471319 "https://www.google.com/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/styles.css HTTP/1.1" 200 3572 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/layout.css HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/prev.gif HTTP/1.1" 200 244 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/artsrc.gif HTTP/1.1" 200 239 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/grp1c.gif HTTP/1.1" 200 181 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/next.gif HTTP/1.1" 200 193 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/alpha.gif HTTP/1.1" 200 220 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/next.gif HTTP/1.1" 200 193 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconXMLDocument.gif HTTP/1.1" 200 652 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/artsrc.gif HTTP/1.1" 200 239 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/fbpelogp.gif HTTP/1.1" 200 1353 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/toc.gif HTTP/1.1" 200 164 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/prev.gif HTTP/1.1" 200 244 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/next.gif HTTP/1.1" 200 193 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+177.17.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/pt/grp1c.gif HTTP/1.1" 200 202 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1415-43662015000600534&lng=pt&tlng=pt" "Mozilla/5.0 (Linux; Android 10; SM-A105M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+157.38.0.1 - - [21/May/2021:23:58:51 -0300] "GET /scielo.php?script=sci_arttext&pid=S0103-50532011000300020 HTTP/1.1" 200 15815 "https://www.google.com/" "Mozilla/5.0 (Linux; Android 8.1.0; Redmi Note 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /css/screen.css HTTP/1.1" 200 89 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/author.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/general.css HTTP/1.1" 200 133 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/layout.css HTTP/1.1" 200 427 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/subject.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /applications/scielo-org/js/toolbox.js HTTP/1.1" 200 3653 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/search.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/home.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/alpha.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/styles.css HTTP/1.1" 200 3572 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconStatistics.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /article.js HTTP/1.1" 200 8231 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconPDFDocument.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconXMLDocument.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/author.gif HTTP/1.1" 200 219 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+191.240.0.2 - - [21/May/2021:23:58:51 -0300] "GET /xsl/plus/static/js/modernizr.custom.js HTTP/1.1" 200 11295 "https://www.scielo.br/scielo.php?pid=S1415-65552014000600874&script=sci_arttext_plus&tlng=pt" "Mozilla/5.0 (Linux; Android 8.1.0; SM-J260MU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /applications/scielo-org/js/jquery-1.4.2.min.js HTTP/1.1" 200 72174 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/alpha.gif HTTP/1.1" 200 220 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/general.css HTTP/1.1" 200 133 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/layout.css HTTP/1.1" 200 427 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/styles.css HTTP/1.1" 200 3572 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/fulltxt.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconTranslation.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedOff.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedGoogleOn.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedOff.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedGoogleOn.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /google_metrics/get_h5_m5.php?issn=0104-6632&callback=jsonp1621652375255 HTTP/1.1" 200 155 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /article.js HTTP/1.1" 200 8231 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconPDFDocument.gif HTTP/1.1" 200 628 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/fbpelogp.gif HTTP/1.1" 200 1353 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/grp1c.gif HTTP/1.1" 200 181 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/artsrc.gif HTTP/1.1" 200 239 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/toc.gif HTTP/1.1" 200 164 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/common/iconPermalink.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/common/icon-close.png HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/e-mailt.gif HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/expandd2.png HTTP/1.1" 200 1487 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/prev.gif HTTP/1.1" 200 244 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/collapsed2.png HTTP/1.1" 200 339 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/next.gif HTTP/1.1" 200 193 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+191.240.0.2 - - [21/May/2021:23:58:51 -0300] "GET /xsl/plus/static/css/bootstrap.min.css HTTP/1.1" 200 106092 "https://www.scielo.br/scielo.php?pid=S1415-65552014000600874&script=sci_arttext_plus&tlng=pt" "Mozilla/5.0 (Linux; Android 8.1.0; SM-J260MU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/subject.gif HTTP/1.1" 200 229 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/fulltxt.gif HTTP/1.1" 200 643 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedOff.gif HTTP/1.1" 200 288 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconStatistics.gif HTTP/1.1" 200 1052 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/lattescv-button.gif HTTP/1.1" 200 1041 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/fbpelogp.gif HTTP/1.1" 200 1353 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/toc.gif HTTP/1.1" 200 164 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/grp1c.gif HTTP/1.1" 200 181 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/artsrc.gif HTTP/1.1" 200 239 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/next.gif HTTP/1.1" 200 193 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/prev.gif HTTP/1.1" 200 244 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedOff.gif HTTP/1.1" 200 288 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconPDFDocument.gif HTTP/1.1" 200 628 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/toc.gif HTTP/1.1" 200 164 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/prev.gif HTTP/1.1" 200 244 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+201.182.0.1 - - [21/May/2021:23:58:51 -0300] "GET /scielo.php?pid=S1414-81452019000200211&script=sci_arttext&tlng=pt HTTP/1.1" 200 27425 "https://www.google.com/" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/common/icon-close.png HTTP/1.1" 200 3091 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/subject.gif HTTP/1.1" 200 229 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconStatistics.gif HTTP/1.1" 200 1052 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/home.gif HTTP/1.1" 200 190 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/search.gif HTTP/1.1" 200 210 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/author.gif HTTP/1.1" 200 219 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/alpha.gif HTTP/1.1" 200 220 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/collapsed2.png HTTP/1.1" 200 339 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/expandd2.png HTTP/1.1" 200 1487 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+191.240.0.2 - - [21/May/2021:23:58:51 -0300] "GET /xsl/plus/static/js/jquery.1.9.1.min.js HTTP/1.1" 200 92630 "https://www.scielo.br/scielo.php?pid=S1415-65552014000600874&script=sci_arttext_plus&tlng=pt" "Mozilla/5.0 (Linux; Android 8.1.0; SM-J260MU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/fulltxt.gif HTTP/1.1" 200 643 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/author.gif HTTP/1.1" 200 219 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/subject.gif HTTP/1.1" 200 229 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconXMLDocument.gif HTTP/1.1" 200 652 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/search.gif HTTP/1.1" 200 210 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/home.gif HTTP/1.1" 200 190 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/alpha.gif HTTP/1.1" 200 220 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconStatistics.gif HTTP/1.1" 200 1052 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/fulltxt.gif HTTP/1.1" 200 643 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+177.52.0.1 - - [21/May/2021:23:58:51 -0300] "GET /google_metrics/get_h5_m5.php?issn=1517-9702&callback=jsonp1621652390876 HTTP/1.1" 200 155 "https://www.scielo.br/scielo.php?pid=S1517-97022004000100004&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconPDFDocument.gif HTTP/1.1" 200 628 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconXMLDocument.gif HTTP/1.1" 200 652 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/fulltxt.gif HTTP/1.1" 200 643 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+190.232.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconTranslation.gif HTTP/1.1" 200 578 "http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1809-29502015000300246&lang=es" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedOff.gif HTTP/1.1" 200 288 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconTranslation.gif HTTP/1.1" 200 578 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/lattescv-button.gif HTTP/1.1" 200 1041 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/collapsed2.png HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+201.182.0.1 - - [21/May/2021:23:58:51 -0300] "GET /css/screen.css HTTP/1.1" 200 89 "https://www.scielo.br/scielo.php?pid=S1414-81452019000200211&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/expandd2.png HTTP/1.1" 304 0 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+174.249.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/alpha.gif HTTP/1.1" 200 220 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1806-37132018000200083" "Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+191.240.0.2 - - [21/May/2021:23:58:51 -0300] "GET /xsl/plus/static/js/bootstrap.min.js HTTP/1.1" 200 28538 "https://www.scielo.br/scielo.php?pid=S1415-65552014000600874&script=sci_arttext_plus&tlng=pt" "Mozilla/5.0 (Linux; Android 8.1.0; SM-J260MU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+186.225.0.1 - - [21/May/2021:23:58:51 -0300] "GET /google_metrics/get_h5_m5.php?issn=1516-8484&callback=jsonp1621652389467 HTTP/1.1" 200 155 "https://www.scielo.br/scielo.php?pid=S1516-84842007000300007&script=sci_abstract&tlng=es" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedGoogleOn.gif HTTP/1.1" 200 641 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedGoogleOn.gif HTTP/1.1" 200 625 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedOff.gif HTTP/1.1" 200 262 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/fulltxt.gif HTTP/1.1" 200 643 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconXMLDocument.gif HTTP/1.1" 200 652 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconPDFDocument.gif HTTP/1.1" 200 628 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+201.182.0.1 - - [21/May/2021:23:58:51 -0300] "GET /xsl/pmc/v3.0/xml.css HTTP/1.1" 200 5766 "https://www.scielo.br/scielo.php?pid=S1414-81452019000200211&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/fulltxt.gif HTTP/1.1" 200 643 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/lattescv-button.gif HTTP/1.1" 200 1041 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconTranslation.gif HTTP/1.1" 200 578 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+176.88.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/fbpe/bjce/v17n4-7/a4f1.gif HTTP/1.1" 200 9521 "https://www.scielo.br/scielo.php?pid=S0104-66322000000400005&script=sci_arttext" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/common/iconPermalink.gif HTTP/1.1" 200 382 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+177.185.0.0 - - [21/May/2021:23:58:51 -0300] "GET /img/common/icon-close.png HTTP/1.1" 200 3091 "https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1679-39512005000100007" "Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedOff.gif HTTP/1.1" 200 288 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+201.182.0.1 - - [21/May/2021:23:58:51 -0300] "GET /css/screen/general.css HTTP/1.1" 200 133 "https://www.scielo.br/css/screen.css" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+201.182.0.1 - - [21/May/2021:23:58:51 -0300] "GET /article.js HTTP/1.1" 200 8231 "https://www.scielo.br/scielo.php?pid=S1414-81452019000200211&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+54.241.0.1 - - [21/May/2021:23:58:51 -0300] "GET / HTTP/1.1" 200 2935 "-" "Amazon-Route53-Health-Check-Service (ref 1261cdc1-a132-45b2-8c26-5de713c689cb; report http://amzn.to/1vsZADi)"
+191.240.0.2 - - [21/May/2021:23:58:51 -0300] "GET /xsl/plus/static/js/scielo-article.js HTTP/1.1" 200 4329 "https://www.scielo.br/scielo.php?pid=S1415-65552014000600874&script=sci_arttext_plus&tlng=pt" "Mozilla/5.0 (Linux; Android 8.1.0; SM-J260MU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconRelatedOff.gif HTTP/1.1" 200 262 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+138.97.0.1 - - [21/May/2021:23:58:51 -0300] "GET /img/en/iconCitedGoogleOn.gif HTTP/1.1" 200 641 "https://www.scielo.br/scielo.php?pid=S0100-40422017000700791&script=sci_arttext&tlng=pt" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
diff --git a/metrics/tests/fixtures/usage.scl.bunnynet.log b/metrics/tests/fixtures/usage.scl.bunnynet.log
new file mode 100644
index 0000000..620dd48
--- /dev/null
+++ b/metrics/tests/fixtures/usage.scl.bunnynet.log
@@ -0,0 +1,67 @@
+HIT|200|1757548785|9146|4339610|240e:3b0:a00e:10a3::|-|https://www.scielo.br/media/images/FAPESP.png|CN|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36|7caaeff65a64c5235f863868a7c94d69|CN
+HIT|200|1757548786|5432|4339610|186.225.0.1|-|https://www.scielo.br/j/neco/a/dqLRqnpmnncSmnzMCB8bzPG/|BR|Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36|8dbbeef65a64c5235f863868a7c94d70|BR
+MISS|200|1757548787|12345|4339610|177.52.0.1|https://www.google.com|https://www.scielo.br/j/psoc/a/hbSYnTbyNfzxcWT3FpXrL5G/?lang=es|BR|Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36|9eccfef65a64c5235f863868a7c94d71|BR
+HIT|200|1757548788|7890|4339610|200.144.0.1|-|https://www.scielo.br/j/rbz/a/cKnLLBn5NnshCX93Y6qYpHv/?format=pdf|BR|Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0|afdd0ef65a64c5235f863868a7c94d72|BR
+MISS 200 1755473649 29321 4339610 185.29.10.0 - http://www.scielo.br/j/rbb/a/qvkmfPDpQk4zZfSnWXJHrVQ/?lang=pt SE Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0 5dc9f2b1416a10af31321b8aad30b8f4 SE
+MISS|200|1755473648|26413|4339610|185.29.10.0|-|http://www.scielo.br/j/pab/a/xBnG6SmJRwz7Hzs8dVKNfqv/?lang=pt|SE|Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0|b9824b1192f138e1c09c6f1aceea92a0|SE
+MISS|200|1755473648|30693|4339610|185.29.10.0|-|http://www.scielo.br/j/cagro/a/z8d5Z5hGJxZgLRgQvcGLMdf/?lang=pt|SE|Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0|a544b893e26f389c7b1045246d756831|SE
+MISS|301|1755473648|1435|4339610|185.29.10.0|-|http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0100-84042002000300008&lng=pt&nrm=iso&tlng=pt|SE|Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0|d0b6cb231fafac81cf42c524d05e0882|SE
+MISS|301|1755473648|1448|4339610|185.29.10.0|-|http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0100-204X2005000400015&lng=pt&tlng=pt|SE|Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0|88fa5720e9fac38470a00ef6f8e6d35a|SE
+MISS|301|1755473647|1455|4339610|185.29.10.0|-|http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1413-70542010000100007&lng=pt&tlng=pt|SE|Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0|73c06380eecbdad38e094a3623592fc7|SE
+MISS|200|1755473644|37617|4339610|185.29.10.0|-|http://www.scielo.br/j/rbz/a/CKSH5K8T7x7Y84zMnSb7L4L/?lang=pt|SE|Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0|61d8e51d071e39d6b7b770ddf6406ae6|SE
+MISS|301|1755473643|1447|4339610|185.29.10.0|-|http://www.scielo.br/scielo.php?script=sci_arttext&pid=S1516-35982007001000002&lng=pt&tlng=pt|SE|Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0|74b6a623493442ba3535e0a374f67229|SE
+MISS|301|1755469323|1160|4339610|23.98.186.0|-|https://www.scielo.br/j/pcp/a/K4kBgKXqVW5HJt8WQT6chKd/|TX|Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot|f5ce4a53efe52ca927fe07ba147e2547|US
+MISS|404|1755469323|3206|4339610|23.98.186.0|-|https://www.scielo.br/j/dpjo/a/cpSn3rmDvrkMNTHj7bsPxgh|TX|Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot|63df76250a368cbf739b066379603c37|US
+HIT|200|1755387106|2362|4339610|116.5.172.0|-|https://www.scielo.br/media/images/BVS.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.43 Safari/537.36|42e0b299d66e93689d4233f961e421c1|CN
+HIT|200|1755387106|1396|4339610|116.5.172.0|-|https://www.scielo.br/static/img/logo-open-access.svg|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.43 Safari/537.36|b217d8614d19b1975be34ce4dce17b9a|CN
+HIT|200|1755387106|116279|4339610|116.5.172.0|-|http://www.scielo.br/static/js/scielo-bundle-min.js|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.43 Safari/537.36|38f8e569d4db49213370cf57a13d2bcd|CN
+MISS|200|1755387105|35358|4339610|113.64.81.0|-|http://www.scielo.br/.bunny-shield/bd/bunnyprint.js|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.29 Safari/537.36|77d2570ea71505716432f7ee5971f3f2|CN
+HIT|200|1755387105|4237|4339610|113.64.81.0|-|https://www.scielo.br/media/images/BIREME.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.29 Safari/537.36|225052909be452a0d7820870b9eeee0c|CN
+HIT|200|1755387105|8818|4339610|113.64.81.0|-|https://www.scielo.br/media/images/FAPESP.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.29 Safari/537.36|0be807dc181599a8edd0018a0432453e|CN
+HIT|200|1755387105|6897|4339610|113.64.81.0|-|https://www.scielo.br/media/images/CAPES.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.29 Safari/537.36|f9a82944fa63468affa03834cba80575|CN
+HIT|200|1755387105|8602|4339610|113.64.81.0|-|https://www.scielo.br/media/images/CNPq.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.29 Safari/537.36|be79fbe4041ef8f959913833e962082c|CN
+HIT|200|1755387105|5085|4339610|113.64.81.0|-|https://www.scielo.br/media/images/FAP-UNIFESP.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.29 Safari/537.36|11889aa0e0b68b2e112ac86ccf6efc21|CN
+MISS|200|1755387228|263055|4339610|190.216.61.0|https://www.scielo.br/j/cadbto/a/Rj4pnrVyh3Pt9MnJ9pkNZtM/?format=pdf&lang=en|https://www.scielo.br/j/cadbto/a/Rj4pnrVyh3Pt9MnJ9pkNZtM/?format=pdf&lang=en|AR|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36|510a20260a79caca62a837dc37eb883f|AR
+HIT|200|1757548785|9146|4339610|240e:3b0:a00e:10a3::|-|https://www.scielo.br/media/images/FAPESP.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|7caaeff65a64c5235f863868a7c94d69|CN
+HIT|200|1757548784|2886|4339610|240e:3b0:a00e:10a3::|-|http://www.scielo.br/static/img/logo-cnpq--dark.svg|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|f95f87b22e0459457e2690a831b655b0|CN
+MISS|200|1757548784|1283|4339610|107.172.204.0|-|https://www.scielo.br/j/rdgv/i/2023.v19/|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36|c44b e4b3a0e28476cc7ecd85536dd5dc|US
+HIT|200|1757548783|7221|4339610|240e:3b0:a00e:10a3::|-|http://www.scielo.br/static/img/logo-bvs.svg|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|fd3b2b560bd40042517330d996066cb4|CN
+HIT|200|1757548783|4566|4339610|240e:3b0:a00e:10a3::|-|https://www.scielo.br/media/images/BIREME.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|da6a4e9ad37b74579423c248c4504c72|CN
+HIT|200|1757548782|154545|4339610|240e:3b0:a00e:10a3::|-|http://www.scielo.br/static/img/logo-capes--dark.svg|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|b8be8b7dc04b7282aa6a6ba039369c04|CN
+HIT|200|1757548782|7225|4339610|240e:3b0:a00e:10a3::|-|https://www.scielo.br/media/images/CAPES.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|5640aff5a6b6be98ffa42d4ec816ea5f|CN
+HIT|200|1757548782|8930|4339610|240e:3b0:a00e:10a3::|-|https://www.scielo.br/media/images/CNPq.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|56f0e22b76f07607da2a7299a0c0fd60|CN
+HIT|200|1757548782|2690|4339610|240e:3b0:a00e:10a3::|-|https://www.scielo.br/media/images/BVS.png|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|4afb166068f9c5ede0a3acd1dbe470ce|CN
+HIT|200|1757548781|15441|4339610|240e:3b0:a00e:10a3::|-|http://www.scielo.br/static/img/logo-bireme--dark.svg|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|cb2c33fee71a102fdb2a6fd24e85405d|CN
+MISS|200|1757548781|13125|4339610|240e:3b0:7c11:e2ba::|-|http://www.scielo.br/|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.46 Safari/537.36|fe8f9167050f4a7fc499c9a74c7e884a|CN
+HIT|200|1757548780|194279|4339610|240e:3b0:a00e:10a3::|-|http://www.scielo.br/static/img/img-post-blog-scielo-exemplo.jpg|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|658b76904d833f16bdc1ea74a202a759|CN
+HIT|200|1757548780|234005|4339610|240e:3b0:a00e:10a3::|-|http://www.scielo.br/static/img/logo-capes.svg|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|818432b6ce34ce9c24c333dec8ffedfa|CN
+MISS|200|1757548779|1290|4339610|172.178.140.0|-|https://www.scielo.br/j/sdeb/a/Jbg5jB3yFMBQjnyJkcTfy3f/?format=pdf&lang=en|LA|Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0;+https://openai.com/bot|c02427cd6e5c12480d6497ed170b85e8|US
+HIT|200|1757548779|2624|4339610|240e:3b0:a00e:10a3::|-|http://www.scielo.br/static/img/logo-fapesp.svg|LA|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.27 Safari/537.36|aa8664ca05c405df632d1c78b5cdf2ec|CN
+HIT|206|1757548784|8472|4339610|2804:1b2:d144:5193::|-|https://www.scielo.br/static/img/favicon.ico|BR|WhatsApp/2.23.20.0|5aa2d7bc7ab139a391f03de133f0feac|BR
+HIT|206|1757548784|7816|4339610|2804:1b2:d144:5193::|-|https://www.scielo.br/media/images/pusf_glogo.gif|BR|WhatsApp/2.23.20.0|8114c288c6ffca53ba3dbe7c0bdf284d|BR
+MISS|200|1757548784|32087|4339610|2804:1b2:d144:5193::|-|https://www.scielo.br/j/pusf/a/8pqYN36tQsDYg8PMTqmRscs/?format=html&lang=pt|BR|WhatsApp/2.23.20.0|ba7f527cab609a10a6dd4d8bc2219f1b|BR
+HIT|200|1757548784|5686|4339610|201.87.253.0|https://www.scielo.br/j/rdbci/a/7qMcGcKQbBsqqNxyTQgJnyr/?lang=pt|https://www.scielo.br/media/images/rdbci_logo.png|BR|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36|a79e6a873e65fd9edc8ecd5538fc44e2|BR
+MISS|200|1757548784|35214|4339610|201.87.253.0|-|https://www.scielo.br/j/rdbci/a/7qMcGcKQbBsqqNxyTQgJnyr/?lang=pt|BR|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36|1fed1c099b06cb7673ef5de69b109f7b|BR
+HIT|200|1757548784|2898|4339610|2804:1b2:d144:5193::|https://www.scielo.br/j/pusf/a/8pqYN36tQsDYg8PMTqmRscs/?format=html&lang=pt|https://www.scielo.br/static/img/favicon.ico|BR|Mozilla/5.0 (iPhone; CPU iPhoneOS 18_6_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Mobile/15E148 Safari/604.1|8dbd7649b4b2784bafb98d8247d2a12a|BR
+MISS|301|1757548784|836|4339610|2804:214:8213:d647::|-|https://www.scielo.br/pdf/reeusp/v49n2/pt_0080-6234-reeusp-49-02-0261.pdf|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Mobile Safari/537.36|7b2e26c0e349bf7adc67162d9abd48b0|BR
+MISS|200|1757548783|4027|4339610|191.37.20.0|https://www.scielo.br/static/css/bootstrap.css?v=1.1.20|https://www.scielo.br/static/fonts/scielo-social-network.ttf?dhp6e8|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|b27531886c923c5f20d554a9028f5055|BR
+MISS|301|1757548783|1102|4339610|201.87.253.0|-|https://www.scielo.br/scielo.php?script=sci_arttext&pid=S1678-765X2025000100400&lang=pt|BR|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36|fd8c3cb118c1c175474d680d07eec150|BR
+HIT|200|1757548783|4058|4339610|191.37.20.0|https://www.scielo.br/static/css/bootstrap.css?v=1.1.20|https://www.scielo.br/static/img/logo-scielo-no-label.svg|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|db9cbaa6b15abaec43f0c14f9c86a3c1|BR
+HIT|200|1757548783|6382|4339610|187.92.246.0|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/?format=html&lang=pt|https://www.scielo.br/static/img/scimago.svg|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|be05de389aad37df3b85cf31c11bef32|BR
+MISS|200|1757548783|670|4339610|2804:29b8:5068:8edd::|https://www.scielo.br/j/sausoc/a/bsyjWnYqPyyJHnBYd5zrL5x/?format=html&lang=pt|https://www.scielo.br/.bunny-shield/bunnyprint/collect|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|c37c550b4f36633edeb894ba7a1586df|BR
+HIT|304|1757548782|790|4339610|187.92.246.0|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/?format=html&lang=pt|https://www.scielo.br/media/images/FAPESP.png|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|ed3109ecc86838b8c23870605c2471e3|BR
+HIT|304|1757548782|786|4339610|187.92.246.0|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/?format=html&lang=pt|https://www.scielo.br/media/images/BVS.png|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|5208750de8e07c0de695daf7f481a1a4|BR
+HIT|304|1757548782|789|4339610|187.92.246.0|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/?format=html&lang=pt|https://www.scielo.br/media/images/CNPq.png|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|845fedf70d4a8f348f1f99bf67d1d4ef|BR
+HIT|304|1757548782|793|4339610|187.92.246.0|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/?format=html&lang=pt|https://www.scielo.br/media/images/FAP-UNIFESP.png|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|321611726563c46cb2d2411cac9fa5f8|BR
+HIT|304|1757548782|789|4339610|187.92.246.0|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/?format=html&lang=pt|https://www.scielo.br/media/images/BIREME.png|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|f29948a6125e436b09d7196d61b977b3|BR
+HIT|304|1757548782|788|4339610|187.92.246.0|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/?format=html&lang=pt|https://www.scielo.br/media/images/CAPES.png|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|7b95a8b106b64f0ce2203c6105162699|BR
+HIT|200|1757548782|7807|4339610|2804:1b2:d144:5193::|-|https://www.scielo.br/media/images/pusf_glogo.gif|BR|NetworkingExtension/8621.3.11.10.3 Network/4277.140.33 iOS/18.6.2|3f7e286388473e03c302450f29217375|BR
+HIT|200|1757548782|2888|4339610|2804:1b2:d144:5193::|-|https://www.scielo.br/static/img/favicon.ico|BR|NetworkingExtension/8621.3.11.10.3 Network/4277.140.33 iOS/18.6.2|0069ba1a0d787b47ac27732c75cbffb7|BR
+MISS|200|1757548782|23087|4339610|187.92.246.0|https://www.google.com/|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/?format=html&lang=pt|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|615444ecda7ae663140a7c740be495fd|BR
+MISS|200|1757548782|32372|4339610|2804:29b8:5068:8edd::|https://www.scielo.br/j/sausoc/a/bsyjWnYqPyyJHnBYd5zrL5x/?format=html&lang=pt|https://www.scielo.br/.bunny-shield/bd/bunnyprint.js|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|b69221d23f7981991c114d52941d32ce|BR
+HIT|200|1757548782|10181|4339610|2804:29b8:5068:8edd::|https://www.scielo.br/j/sausoc/a/bsyjWnYqPyyJHnBYd5zrL5x/?format=html&lang=pt|https://www.scielo.br/media/images/sausoc_glogo.gif|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|4601f8d68bd57394d278ee1ea81a376f|BR
+MISS|200|1757548782|21786|4339610|2804:29b8:5068:8edd::|https://www.google.com/|https://www.scielo.br/j/sausoc/a/bsyjWnYqPyyJHnBYd5zrL5x/?format=html&lang=pt|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|6f31f80b57c8fc094b0907c05ab49c5e|BR
+HIT|200|1757548782|4066|4339610|143.107.252.0|https://www.scielo.br/static/css/bootstrap.css?v=1.1.20|https://www.scielo.br/static/img/logo-scielo-no-label.svg|BR|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36|8791ce286667fad03c4e67eba215f450|BR
+MISS|200|1757548782|632|4339610|143.107.252.0|https://www.scielo.br/j/jvatitd/a/NGR7dTyggrnGBSxvQ6pdSJs/?lang=en|https://www.scielo.br/.bunny-shield/bunnyprint/collect|BR|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36|4d31046b0f5d09b2c8aaf88281be4e08|BR
+HIT|200|1757548782|905|4339610|143.107.252.0|https://www.scielo.br/static/css/article.css?v=1.1.20|https://www.scielo.br/static/img/dashline.png|BR|Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36|0377aa19b04b0ff1b8b4f4589a07600f|BR
+MISS|301|1757548782|1142|4339610|187.92.246.0|https://www.google.com/|https://www.scielo.br/j/rbccv/a/qYJb4RL66h5Wpmg6X5KJ6Sm/|BR|Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Mobile Safari/537.36|33fde882099e2827cc7975e34f0f1e91|BR
\ No newline at end of file
diff --git a/metrics/tests/fixtures/user_agents.txt b/metrics/tests/fixtures/user_agents.txt
new file mode 100644
index 0000000..60561f0
--- /dev/null
+++ b/metrics/tests/fixtures/user_agents.txt
@@ -0,0 +1,13 @@
+"-2051 OR 6187=(SELECT UPPER(XMLType(CHR(60)||CHR(58)||CHR(113)||CHR(122)||CHR(98)||CHR(122)||CHR(113)||(SELECT (CASE WHEN (6187=6187) THEN 1 ELSE 0 END) FROM DUAL)||CHR(113)||CHR(112)||CHR(122)||CHR(98)||CHR(113)||CHR(62))) FROM DUAL)# Gheq"
+"Amazon-Route53-Health-Check-Service (ref 1261cdc1-a132-45b2-8c26-5de713c689cb; report http://amzn.to/1vsZADi)"
+"Mozilla/5.0 (iPhone; CPU iPhone OS 13_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/137.2.345735309 Mobile/15E148 Safari/604.1"
+"Mozilla/5.0 (Linux; Android 11; SM-N981U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.101 Mobile Safari/537.36"
+"Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)"
+"Mozilla/5.0 (Linux; Android 8.1.0; Redmi Note 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+"Mozilla/5.0 (Linux; Android 8.1.0; SM-J260MU) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+"Mozilla/5.0 (Linux; Android 9; SM-G9600) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36"
+"Mozilla/5.0 (Macintosh; U; PPC Mac OS X; fr-fr) AppleWebKit/125.5.5 (KHTML, like Gecko) Safari/125.11"
+"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
+"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
+"LOCKSS cache"
\ No newline at end of file
diff --git a/metrics/tests/integration/__init__.py b/metrics/tests/integration/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/tests/integration/test_books_log_to_metrics.py b/metrics/tests/integration/test_books_log_to_metrics.py
new file mode 100644
index 0000000..b898a9b
--- /dev/null
+++ b/metrics/tests/integration/test_books_log_to_metrics.py
@@ -0,0 +1,185 @@
+import unittest
+from datetime import datetime
+from pathlib import Path
+
+from scielo_usage_counter.translator.books import URLTranslatorBooksSite
+from scielo_usage_counter.url_translator import URLTranslationManager
+
+from metrics.counter.access import accumulation, extraction, validation
+from metrics.counter.indexing import converter as index_docs
+from scielo_usage_counter import log_handler
+
+FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures"
+
+
+class TestBooksLogToMetrics(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.robots_list = (FIXTURES_DIR / "counter-robots.txt").read_text().splitlines()
+        cls.mmdb_data = (FIXTURES_DIR / "map.mmdb").read_bytes()
+        cls.log_path = str(FIXTURES_DIR / "usage.books.log")
+        cls.utm = URLTranslationManager(
+            documents_metadata=iter([]),
+            sources_metadata=iter([]),
+            translator=URLTranslatorBooksSite,
+        )
+
+    def _parse_log(self):
+        parser = log_handler.LogParser(
+            mmdb_data=self.mmdb_data,
+            robots_list=self.robots_list,
+            output_mode="dict",
+        )
+        parser.logfile = self.log_path
+        return list(parser.parse())
+
+    def _extract_all(self, lines):
+        extracted = []
+        for line in lines:
+            url = line.get("url")
+            if not url:
+                continue
+            translated = self.utm.translate(url)
+            if translated and isinstance(translated, dict):
+                counter_access = extraction.extract("books", translated)
+                extracted.append((counter_access, line))
+        return extracted
+
+    def test_parser_yields_lines_from_mixed_formats(self):
+        lines = self._parse_log()
+        self.assertGreater(len(lines), 0)
+
+    def test_translation_extracts_book_ids(self):
+        lines = self._parse_log()
+        extracted = self._extract_all(lines)
+        self.assertGreater(len(extracted), 0)
+
+        source_ids = {ca.get("source_id") for ca, _ in extracted}
+        self.assertGreater(len(source_ids), 0)
+        for ca, _ in extracted:
+            self.assertEqual(ca["source_type"], "book")
+            self.assertIsNotNone(ca.get("pid_generic"))
+
+    def test_extraction_produces_book_and_chapter_types(self):
+        lines = self._parse_log()
+        extracted = self._extract_all(lines)
+        doc_types = {ca.get("document_type") for ca, _ in extracted}
+        self.assertTrue(doc_types & {"book", "chapter"})
+
+    def test_resolves_country_codes_via_geoip(self):
+        lines = self._parse_log()
+        countries = {line.get("country_code") for line in lines}
+        countries.discard(None)
+        self.assertGreater(len(countries), 0)
+
+    def test_ipv6_address_is_parsed(self):
+        lines = self._parse_log()
+        has_ipv6 = any("::" in (line.get("ip_address") or "") for line in lines)
+        self.assertTrue(has_ipv6)
+
+    def test_pdf_and_epub_formats_detected(self):
+        lines = self._parse_log()
+        extracted = self._extract_all(lines)
+        formats = {ca.get("media_format") for ca, _ in extracted}
+        self.assertTrue(len(formats) > 0)
+
+    def test_full_pipeline_with_synthetic_metadata(self):
+        results = {}
+        counter_access = extraction.extract(
+            "books",
+            {
+                "source_type": "book",
+                "source_id": "xjcw9",
+                "document_type": "book",
+                "book_id": "xjcw9",
+                "book_title": "Test Book",
+                "pid_generic": "book:xjcw9",
+                "title_pid_generic": "book:xjcw9",
+                "media_language": "pt",
+                "media_format": "html",
+                "content_type": "full_text",
+            },
+        )
+
+        valid, _ = validation.is_valid(counter_access)
+        self.assertTrue(valid)
+
+        accumulation.accumulate(
+            results,
+            counter_access,
+            {
+                "client_name": "browser",
+                "client_version": "1.0",
+                "ip_address": "186.215.90.179",
+                "country_code": "BR",
+                "local_datetime": datetime(2012, 4, 1, 0, 0, 29),
+            },
+        )
+
+        metrics = index_docs.convert(results)
+        self.assertGreater(len(metrics["month"]), 0)
+        self.assertGreater(len(metrics["year"]), 0)
+
+        has_item = False
+        has_title = False
+        for doc in metrics["month"].values():
+            scope = doc["counter"]["metric_scope"]
+            if scope == "item":
+                has_item = True
+                self.assertEqual(doc["counter"]["data_type"], "Book_Segment")
+            elif scope == "title":
+                has_title = True
+                self.assertEqual(doc["counter"]["data_type"], "Book")
+
+        self.assertTrue(has_item)
+        self.assertTrue(has_title)
+
+    def test_all_metric_fields_present_in_converted_document(self):
+        results = {}
+        counter_access = extraction.extract(
+            "books",
+            {
+                "source_type": "book",
+                "source_id": "h8pyf",
+                "document_type": "chapter",
+                "book_id": "h8pyf",
+                "chapter_id": "08",
+                "pid_generic": "book:h8pyf/chapter:08",
+                "title_pid_generic": "book:h8pyf",
+                "media_language": "pt",
+                "media_format": "html",
+                "content_type": "full_text",
+                "book_title": "Book H8PYF",
+                "chapter_title": "Chapter 08",
+            },
+        )
+        accumulation.accumulate(
+            results,
+            counter_access,
+            {
+                "client_name": "MSIE",
+                "client_version": "9.0",
+                "ip_address": "189.97.101.205",
+                "country_code": "BR",
+                "local_datetime": datetime(2012, 4, 1, 0, 30, 27),
+            },
+        )
+
+        metrics = index_docs.convert(results)
+        for doc in metrics["month"].values():
+            self.assertIn("total_requests", doc)
+            self.assertIn("total_investigations", doc)
+            self.assertIn("unique_requests", doc)
+            self.assertIn("unique_investigations", doc)
+            self.assertIn("collection", doc)
+            self.assertIn("source", doc)
+            self.assertIn("document", doc)
+            self.assertIn("counter", doc)
+            self.assertIn("access", doc)
+            self.assertIn("daily_metrics", doc)
+
+        for doc in metrics["year"].values():
+            access = doc.get("access", {})
+            self.assertIn("year", access)
+            self.assertNotIn("daily_metrics", doc)
diff --git a/metrics/tests/integration/test_bunnynet_log_to_metrics.py b/metrics/tests/integration/test_bunnynet_log_to_metrics.py
new file mode 100644
index 0000000..084c831
--- /dev/null
+++ b/metrics/tests/integration/test_bunnynet_log_to_metrics.py
@@ -0,0 +1,45 @@
+import unittest
+from pathlib import Path
+
+from scielo_usage_counter import log_handler
+
+FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures"
+
+
+class TestBunnynetLogToMetrics(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.robots_list = (FIXTURES_DIR / "counter-robots.txt").read_text().splitlines()
+        cls.mmdb_data = (FIXTURES_DIR / "map.mmdb").read_bytes()
+        cls.log_path = str(FIXTURES_DIR / "usage.scl.bunnynet.log")
+
+    def _parse_log(self):
+        parser = log_handler.LogParser(
+            mmdb_data=self.mmdb_data,
+            robots_list=self.robots_list,
+            output_mode="dict",
+        )
+        parser.logfile = self.log_path
+        return list(parser.parse()), parser.stats
+
+    def test_parses_bunnynet_pipe_separated_format(self):
+        lines, stats = self._parse_log()
+        self.assertGreater(len(lines), 0)
+
+    def test_extracts_urls_from_bunnynet_format(self):
+        lines, _ = self._parse_log()
+        urls = [line.get("url") for line in lines if line.get("url")]
+        self.assertGreater(len(urls), 0)
+
+    def test_resolves_country_codes(self):
+        lines, _ = self._parse_log()
+        countries = {line.get("country_code") for line in lines}
+        countries.discard(None)
+        self.assertGreater(len(countries), 0)
+
+    def test_extracts_client_info(self):
+        lines, _ = self._parse_log()
+        for line in lines[:3]:
+            self.assertIn("client_name", line)
+            self.assertIn("ip_address", line)
diff --git a/metrics/tests/integration/test_classic_log_to_metrics.py b/metrics/tests/integration/test_classic_log_to_metrics.py
new file mode 100644
index 0000000..6480bc7
--- /dev/null
+++ b/metrics/tests/integration/test_classic_log_to_metrics.py
@@ -0,0 +1,105 @@
+import unittest
+from pathlib import Path
+
+from scielo_usage_counter.translator.classic import URLTranslatorClassicSite
+from scielo_usage_counter.url_translator import URLTranslationManager
+
+from metrics.counter.access import accumulation, extraction, validation
+from metrics.counter.indexing import converter as index_docs
+from scielo_usage_counter import log_handler
+
+FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures"
+
+
+class TestClassicLogToMetrics(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.robots_list = (FIXTURES_DIR / "counter-robots.txt").read_text().splitlines()
+        cls.mmdb_data = (FIXTURES_DIR / "map.mmdb").read_bytes()
+        cls.log_path = str(FIXTURES_DIR / "usage.log")
+        cls.utm = URLTranslationManager(
+            documents_metadata=iter([]),
+            sources_metadata=iter([]),
+            translator=URLTranslatorClassicSite,
+        )
+
+    def _parse_log(self):
+        parser = log_handler.LogParser(
+            mmdb_data=self.mmdb_data,
+            robots_list=self.robots_list,
+            output_mode="dict",
+        )
+        parser.logfile = self.log_path
+        return list(parser.parse()), parser.stats
+
+    def _full_pipeline(self):
+        lines, stats = self._parse_log()
+        results = {}
+        valid_count = 0
+
+        for line in lines:
+            url = line.get("url")
+            if not url:
+                continue
+
+            translated = self.utm.translate(url)
+            if not translated or not isinstance(translated, dict):
+                continue
+
+            counter_access = extraction.extract("scl", translated)
+            is_valid, _ = validation.is_valid(counter_access)
+            if not is_valid:
+                continue
+
+            try:
+                accumulation.accumulate(results, counter_access, line)
+                valid_count += 1
+            except (ValueError, Exception):
+                pass
+
+        return results, lines, stats, valid_count
+
+    def test_filters_static_resources(self):
+        lines, stats = self._parse_log()
+        self.assertLess(len(lines), 200)
+
+    def test_filters_bots(self):
+        lines, stats = self._parse_log()
+        for line in lines:
+            self.assertNotEqual(line.get("client_name", "").lower(), "lockss cache")
+
+    def test_produces_article_type_metrics(self):
+        results, _, _, valid_count = self._full_pipeline()
+        if not results:
+            self.skipTest("No valid lines in classic fixture for this translator")
+            return
+
+        metrics = index_docs.convert(results)
+
+        for doc in metrics["month"].values():
+            self.assertEqual(doc["counter"]["data_type"], "Article")
+            self.assertEqual(doc["counter"]["metric_scope"], "item")
+            self.assertEqual(doc["document"]["type"], "article")
+
+    def test_sets_journal_parent_data_type(self):
+        results, _, _, _ = self._full_pipeline()
+        if not results:
+            self.skipTest("No valid lines")
+            return
+
+        metrics = index_docs.convert(results)
+        for doc in metrics["month"].values():
+            source_type = doc.get("source", {}).get("type")
+            if source_type == "journal":
+                self.assertEqual(doc["counter"]["parent_data_type"], "Journal")
+
+    def test_handles_truncated_user_agent(self):
+        lines, _ = self._parse_log()
+        self.assertGreater(len(lines), 0)
+
+    def test_valid_lines_produce_session_ids(self):
+        results, _, _, _ = self._full_pipeline()
+        for value in results.values():
+            self.assertIn("user_session_id", value)
+            self.assertIsNotNone(value["user_session_id"])
diff --git a/metrics/tests/integration/test_pipelines.py b/metrics/tests/integration/test_pipelines.py
new file mode 100644
index 0000000..95e700f
--- /dev/null
+++ b/metrics/tests/integration/test_pipelines.py
@@ -0,0 +1,129 @@
+import unittest
+from datetime import datetime
+
+from scielo_usage_counter.values import (
+    CONTENT_TYPE_ABSTRACT,
+    CONTENT_TYPE_FULL_TEXT,
+    MEDIA_FORMAT_HTML,
+)
+
+from metrics.counter.access import accumulation, extraction
+from metrics.counter.indexing import converter as index_docs
+
+
+class TestPreprintPipeline(unittest.TestCase):
+    def _build_preprint_access(self, **overrides):
+        base = {
+            "pid_generic": "10.1590/SciELOPreprints.1234",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_FULL_TEXT,
+            "media_language": "en",
+        }
+        base.update(overrides)
+        return extraction.extract("preprints", base)
+
+    def test_extraction_sets_preprint_types(self):
+        data = self._build_preprint_access()
+        self.assertEqual(data["source_type"], "preprint_server")
+        self.assertEqual(data["document_type"], "preprint")
+        self.assertEqual(data["source_id"], "scielo-preprints")
+
+    def test_full_pipeline_produces_preprint_article_version(self):
+        counter_access = self._build_preprint_access()
+        results = {}
+        line = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "200.1.2.3",
+            "country_code": "BR",
+            "local_datetime": datetime(2024, 6, 15, 14, 30, 10),
+        }
+        accumulation.accumulate(results, counter_access, line)
+        metrics = index_docs.convert(results)
+
+        month_docs = list(metrics["month"].values())
+        self.assertEqual(len(month_docs), 1)
+        doc = month_docs[0]
+        self.assertEqual(doc["counter"]["data_type"], "Article")
+        self.assertEqual(doc["counter"]["article_version"], "Preprint")
+        self.assertEqual(doc["counter"]["metric_scope"], "item")
+        self.assertEqual(doc["document"]["type"], "preprint")
+        self.assertEqual(doc["document"]["id"], "10.1590/SCIELOPREPRINTS.1234")
+        self.assertEqual(doc["total_requests"], 1)
+        self.assertEqual(doc["unique_requests"], 1)
+
+
+class TestDataversePipeline(unittest.TestCase):
+    def _build_dataset_access(self, **overrides):
+        base = {
+            "pid_generic": "10.48331/scielodata.abc123",
+            "media_format": MEDIA_FORMAT_HTML,
+            "content_type": CONTENT_TYPE_ABSTRACT,
+        }
+        base.update(overrides)
+        return extraction.extract("data", base)
+
+    def test_extraction_sets_dataset_types(self):
+        data = self._build_dataset_access()
+        self.assertEqual(data["source_type"], "data_repository")
+        self.assertEqual(data["document_type"], "dataset")
+        self.assertEqual(data["source_id"], "scielo-data")
+
+    def test_full_pipeline_produces_dataset_metrics(self):
+        counter_access = self._build_dataset_access()
+        results = {}
+        line = {
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "200.1.2.3",
+            "country_code": "BR",
+            "local_datetime": datetime(2024, 6, 15, 14, 30, 10),
+        }
+        accumulation.accumulate(results, counter_access, line)
+        metrics = index_docs.convert(results)
+
+        month_docs = list(metrics["month"].values())
+        self.assertEqual(len(month_docs), 1)
+        doc = month_docs[0]
+        self.assertEqual(doc["counter"]["data_type"], "Dataset")
+        self.assertNotIn("article_version", doc["counter"])
+        self.assertEqual(doc["document"]["type"], "dataset")
+        self.assertEqual(doc["total_investigations"], 1)
+        self.assertEqual(doc["total_requests"], 0)
+
+
+class TestOPACPipeline(unittest.TestCase):
+    def test_opac_article_produces_journal_article_metrics(self):
+        counter_access = extraction.extract(
+            "scl",
+            {
+                "scielo_issn": "1234-5678",
+                "pid_v3": "S1234-56782024000100001",
+                "article_title": "Test OPAC Article",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+                "media_language": "pt",
+                "journal_main_title": "Test Journal",
+                "journal_acronym": "testjou",
+                "journal_publisher_name": ["SciELO"],
+            },
+        )
+
+        results = {}
+        line = {
+            "client_name": "Chrome",
+            "client_version": "120.0",
+            "ip_address": "189.10.20.30",
+            "country_code": "BR",
+            "local_datetime": datetime(2024, 3, 20, 8, 15, 42),
+        }
+        accumulation.accumulate(results, counter_access, line)
+        metrics = index_docs.convert(results)
+
+        doc = list(metrics["month"].values())[0]
+        self.assertEqual(doc["counter"]["data_type"], "Article")
+        self.assertEqual(doc["counter"]["parent_data_type"], "Journal")
+        self.assertEqual(doc["document"]["type"], "article")
+        self.assertEqual(doc["source"]["type"], "journal")
+        self.assertEqual(doc["source"]["id"], "1234-5678")
+        self.assertEqual(doc["total_requests"], 1)
diff --git a/metrics/tests/opensearch/__init__.py b/metrics/tests/opensearch/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/tests/test_opensearch.py b/metrics/tests/opensearch/test_client.py
similarity index 64%
rename from metrics/tests/test_opensearch.py
rename to metrics/tests/opensearch/test_client.py
index 49e21b3..9eb7ebc 100644
--- a/metrics/tests/test_opensearch.py
+++ b/metrics/tests/opensearch/test_client.py
@@ -3,26 +3,33 @@
 
 from django.test import override_settings
 
-from metrics import opensearch
+from metrics.opensearch.client import OpenSearchUsageClient
+from metrics.opensearch.mappings import (
+    BOOKS_MONTH_INDEX_MAPPINGS,
+    BOOKS_YEAR_INDEX_MAPPINGS,
+    MONTH_INDEX_MAPPINGS,
+    YEAR_INDEX_MAPPINGS,
+    get_index_mappings,
+)
 
 
 class OpenSearchUsageClientTests(TestCase):
-    @patch.object(opensearch.OpenSearchUsageClient, "get_opensearch_client")
+    @patch.object(OpenSearchUsageClient, "get_opensearch_client")
     def test_create_index_sends_mappings_in_request_body(self, mock_get_client):
         mock_client = Mock()
         mock_get_client.return_value = mock_client
 
-        client = opensearch.OpenSearchUsageClient(url="https://example.org:9200")
+        client = OpenSearchUsageClient(url="https://example.org:9200")
         client.create_index(
             index_name="usage_monthly_books_202506",
-            mappings=opensearch.MONTH_INDEX_MAPPINGS,
+            mappings=MONTH_INDEX_MAPPINGS,
         )
 
         mock_client.indices.create.assert_called_once_with(
             index="usage_monthly_books_202506",
             body={
                 "settings": {"index": {"number_of_replicas": 0}},
-                "mappings": opensearch.MONTH_INDEX_MAPPINGS,
+                "mappings": MONTH_INDEX_MAPPINGS,
             },
         )
 
@@ -33,7 +40,7 @@ def test_create_index_sends_mappings_in_request_body(self, mock_get_client):
     )
     @patch("metrics.opensearch.client.OpenSearch")
     def test_verify_certs_false_explicitly_overrides_settings(self, mock_opensearch):
-        opensearch.OpenSearchUsageClient(
+        OpenSearchUsageClient(
             url="https://example.org:9200",
             verify_certs=False,
         )
@@ -45,23 +52,21 @@ def test_verify_certs_false_explicitly_overrides_settings(self, mock_opensearch)
 
     def test_get_index_mappings_returns_books_specific_mappings(self):
         self.assertIs(
-            opensearch.get_index_mappings("books", "month"),
-            opensearch.BOOKS_MONTH_INDEX_MAPPINGS,
+            get_index_mappings("books", "month"),
+            BOOKS_MONTH_INDEX_MAPPINGS,
         )
         self.assertIs(
-            opensearch.get_index_mappings("books", "year"),
-            opensearch.BOOKS_YEAR_INDEX_MAPPINGS,
-        )
-        self.assertIn("counter", opensearch.BOOKS_MONTH_INDEX_MAPPINGS["properties"])
-        self.assertIn("access", opensearch.BOOKS_YEAR_INDEX_MAPPINGS["properties"])
-        self.assertIn(
-            "applied_jobs", opensearch.BOOKS_MONTH_INDEX_MAPPINGS["properties"]
+            get_index_mappings("books", "year"),
+            BOOKS_YEAR_INDEX_MAPPINGS,
         )
+        self.assertIn("counter", BOOKS_MONTH_INDEX_MAPPINGS["properties"])
+        self.assertIn("access", BOOKS_YEAR_INDEX_MAPPINGS["properties"])
+        self.assertIn("applied_jobs", BOOKS_MONTH_INDEX_MAPPINGS["properties"])
         for mappings in (
-            opensearch.MONTH_INDEX_MAPPINGS,
-            opensearch.YEAR_INDEX_MAPPINGS,
-            opensearch.BOOKS_MONTH_INDEX_MAPPINGS,
-            opensearch.BOOKS_YEAR_INDEX_MAPPINGS,
+            MONTH_INDEX_MAPPINGS,
+            YEAR_INDEX_MAPPINGS,
+            BOOKS_MONTH_INDEX_MAPPINGS,
+            BOOKS_YEAR_INDEX_MAPPINGS,
         ):
             for removed_field in (
                 "document_type",
@@ -77,26 +82,27 @@ def test_get_index_mappings_returns_books_specific_mappings(self):
             ):
                 self.assertNotIn(removed_field, mappings["properties"])
             document_mapping = mappings["properties"]["document"]
+            source_mapping = mappings["properties"]["source"]
             self.assertEqual(document_mapping["properties"]["id"]["type"], "keyword")
             self.assertEqual(document_mapping["properties"]["title"]["type"], "text")
+            self.assertFalse(document_mapping["properties"]["title"]["index"])
+            self.assertEqual(source_mapping["properties"]["id"]["type"], "keyword")
+            self.assertEqual(source_mapping["properties"]["title"]["type"], "text")
+            self.assertFalse(source_mapping["properties"]["title"]["index"])
             self.assertEqual(
-                document_mapping["properties"]["title"]["fields"]["keyword"]["type"],
-                "keyword",
-            )
-            self.assertEqual(
-                mappings["properties"]["source"]["properties"]["id"]["type"],
-                "keyword",
+                source_mapping["properties"]["publisher_name"]["type"], "text"
             )
+            self.assertFalse(source_mapping["properties"]["publisher_name"]["index"])
 
     @patch("metrics.opensearch.client.helpers.bulk")
-    @patch.object(opensearch.OpenSearchUsageClient, "get_opensearch_client")
+    @patch.object(OpenSearchUsageClient, "get_opensearch_client")
     def test_increment_documents_for_daily_job_uses_applied_jobs(
         self,
         mock_get_client,
         mock_bulk,
     ):
         mock_get_client.return_value = Mock()
-        client = opensearch.OpenSearchUsageClient(url="https://example.org:9200")
+        client = OpenSearchUsageClient(url="https://example.org:9200")
 
         client.increment_documents_for_daily_job(
             index_name="usage_monthly_books_202506",
diff --git a/metrics/tests/opensearch/test_names.py b/metrics/tests/opensearch/test_names.py
new file mode 100644
index 0000000..f33dab1
--- /dev/null
+++ b/metrics/tests/opensearch/test_names.py
@@ -0,0 +1,23 @@
+import unittest
+
+from metrics.opensearch.names import generate_month_index_name, generate_year_index_name
+
+
+class TestIndexNames(unittest.TestCase):
+    def test_generate_index_names_for_year_and_month(self):
+        self.assertEqual(
+            generate_year_index_name("usage", "scl", "2024-01-15"),
+            "usage_yearly_scl_2024",
+        )
+        self.assertEqual(
+            generate_month_index_name("usage", "scl", "2024-01-15"),
+            "usage_monthly_scl_2024",
+        )
+        self.assertEqual(
+            generate_year_index_name("usage", "books", "2024-01-15"),
+            "usage_yearly_books",
+        )
+        self.assertEqual(
+            generate_month_index_name("usage", "books", "2024-01-15"),
+            "usage_monthly_books",
+        )
diff --git a/metrics/tests/parsing/__init__.py b/metrics/tests/parsing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/tests/parsing/test_environment.py b/metrics/tests/parsing/test_environment.py
new file mode 100644
index 0000000..b496779
--- /dev/null
+++ b/metrics/tests/parsing/test_environment.py
@@ -0,0 +1,39 @@
+from django.test import TestCase
+
+from metrics.services.parsing.environment import _get_translator_class
+
+
+class TranslatorClassTests(TestCase):
+    def test_books_maps_to_books_translator(self):
+        cls = _get_translator_class("books")
+        self.assertEqual(cls.__name__, "URLTranslatorBooksSite")
+
+    def test_classic_maps_to_classic_translator(self):
+        cls = _get_translator_class("classic")
+        self.assertEqual(cls.__name__, "URLTranslatorClassicSite")
+
+    def test_opac_maps_to_opac_translator(self):
+        cls = _get_translator_class("opac")
+        self.assertEqual(cls.__name__, "URLTranslatorOPACSite")
+
+    def test_opac_alpha_maps_to_opac_alpha_translator(self):
+        cls = _get_translator_class("opac_alpha")
+        self.assertEqual(cls.__name__, "URLTranslatorOPACAlphaSite")
+
+    def test_preprints_maps_to_preprints_translator(self):
+        cls = _get_translator_class("preprints")
+        self.assertEqual(cls.__name__, "URLTranslatorPreprintsSite")
+
+    def test_dataverse_maps_to_dataverse_translator(self):
+        cls = _get_translator_class("dataverse")
+        self.assertEqual(cls.__name__, "URLTranslatorDataverseSite")
+
+    def test_unknown_name_returns_none(self):
+        self.assertIsNone(_get_translator_class("unknown"))
+
+    def test_none_returns_none(self):
+        self.assertIsNone(_get_translator_class(None))
+
+    def test_case_insensitive(self):
+        cls = _get_translator_class("Books")
+        self.assertEqual(cls.__name__, "URLTranslatorBooksSite")
diff --git a/metrics/tests/parsing/test_process_line.py b/metrics/tests/parsing/test_process_line.py
new file mode 100644
index 0000000..549d93d
--- /dev/null
+++ b/metrics/tests/parsing/test_process_line.py
@@ -0,0 +1,120 @@
+from datetime import date
+from unittest.mock import Mock
+
+from django.test import TestCase
+from scielo_usage_counter.values import CONTENT_TYPE_FULL_TEXT, MEDIA_FORMAT_HTML
+
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+from metrics.services.parsing.lines import process_line
+
+
+class ProcessLineTests(TestCase):
+    def setUp(self):
+        self.collection = Collection.objects.create(acron3="books", acron2="bk")
+        self.log_file = LogFile.objects.create(
+            hash="1" * 32,
+            path="/tmp/111.log.gz",
+            stat_result={},
+            status=choices.LOG_FILE_STATUS_QUEUED,
+            collection=self.collection,
+            date=date(2012, 3, 10),
+            validation={"probably_date": "2012-03-10"},
+        )
+
+    def _fake_utm(self, translate_return=None, translate_error=None):
+        utm = Mock()
+        if translate_error:
+            utm.translate.side_effect = translate_error
+        else:
+            utm.translate.return_value = translate_return or {
+                "source_type": "book",
+                "source_id": "q7gtd",
+                "book_id": "q7gtd",
+                "pid_generic": "book:q7gtd",
+                "media_language": "en",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            }
+        return utm
+
+    def _line(self, **overrides):
+        base = {
+            "url": "/id/q7gtd",
+            "client_name": "browser",
+            "client_version": "1.0",
+            "ip_address": "127.0.0.1",
+            "country_code": "BR",
+            "local_datetime": None,
+        }
+        base.update(overrides)
+        return base
+
+    def test_discards_invalid_local_datetime_without_raising(self):
+        results = {}
+        is_valid, error = process_line(
+            results=results,
+            line=self._line(),
+            utm=self._fake_utm(),
+            log_file=self.log_file,
+        )
+        self.assertFalse(is_valid)
+        self.assertIsNone(error)
+        self.assertEqual(results, {})
+
+    def test_url_translation_error_returns_false_none(self):
+        results = {}
+        is_valid, error = process_line(
+            results=results,
+            line=self._line(),
+            utm=self._fake_utm(translate_error=ValueError("bad URL")),
+            log_file=self.log_file,
+        )
+        self.assertFalse(is_valid)
+        self.assertIsNone(error)
+
+    def test_valid_line_accumulates_result(self):
+        from datetime import datetime
+
+        results = {}
+        is_valid, error = process_line(
+            results=results,
+            line=self._line(local_datetime=datetime(2024, 1, 15, 10, 0, 5)),
+            utm=self._fake_utm(),
+            log_file=self.log_file,
+        )
+        self.assertTrue(is_valid)
+        self.assertIsNone(error)
+        self.assertEqual(len(results), 1)
+
+    def test_validation_failure_without_track_errors_returns_no_discarded_line(self):
+        results = {}
+        utm = self._fake_utm(
+            translate_return={
+                "pid_generic": "",
+                "media_format": MEDIA_FORMAT_HTML,
+                "content_type": CONTENT_TYPE_FULL_TEXT,
+            }
+        )
+        is_valid, error = process_line(
+            results=results,
+            line=self._line(),
+            utm=utm,
+            log_file=self.log_file,
+            track_errors=False,
+        )
+        self.assertFalse(is_valid)
+        self.assertIsNone(error)
+
+    def test_extraction_error_returns_false_none(self):
+        results = {}
+        utm = self._fake_utm(translate_return="not-a-dict")
+        is_valid, error = process_line(
+            results=results,
+            line=self._line(),
+            utm=utm,
+            log_file=self.log_file,
+        )
+        self.assertFalse(is_valid)
+        self.assertIsNone(error)
diff --git a/metrics/tests/services/__init__.py b/metrics/tests/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metrics/tests/test_cleanup.py b/metrics/tests/services/test_cleanup.py
similarity index 95%
rename from metrics/tests/test_cleanup.py
rename to metrics/tests/services/test_cleanup.py
index e08fa9c..4836cf8 100644
--- a/metrics/tests/test_cleanup.py
+++ b/metrics/tests/services/test_cleanup.py
@@ -115,9 +115,7 @@ def test_cleanup_skips_non_exported_jobs(self):
         paths = []
         for i, status in enumerate(statuses):
             access_date = date(2012, 3, 10 + i)
-            path = daily_payloads.build_daily_storage_path(
-                self.collection, access_date
-            )
+            path = daily_payloads.build_daily_storage_path(self.collection, access_date)
             resolved = self._write_payload_file(path)
             self._set_file_age(resolved, 30)
             paths.append(resolved)
@@ -254,9 +252,11 @@ def setUp(self):
         self.collection = Collection.objects.create(acron3="books", acron2="bk")
 
     def test_task_cleanup_daily_payloads_calls_service(self):
-        with patch("metrics.services.daily_payloads.cleanup_exported_payloads") as mock_cleanup:
+        with patch(
+            "metrics.services.daily_payloads.cleanup_exported_payloads"
+        ) as mock_cleanup:
             mock_cleanup.return_value = 5
-            from metrics.tasks import task_cleanup_daily_payloads
+            from metrics.tasks.cleanup import task_cleanup_daily_payloads
 
             result = task_cleanup_daily_payloads.run(
                 collections=["books"],
@@ -270,9 +270,11 @@ def test_task_cleanup_daily_payloads_calls_service(self):
         self.assertEqual(result, {"deleted_payloads": 5})
 
     def test_task_cleanup_with_defaults(self):
-        with patch("metrics.services.daily_payloads.cleanup_exported_payloads") as mock_cleanup:
+        with patch(
+            "metrics.services.daily_payloads.cleanup_exported_payloads"
+        ) as mock_cleanup:
             mock_cleanup.return_value = 0
-            from metrics.tasks import task_cleanup_daily_payloads
+            from metrics.tasks.cleanup import task_cleanup_daily_payloads
 
             result = task_cleanup_daily_payloads.run()
 
diff --git a/metrics/tests/test_daily_jobs.py b/metrics/tests/services/test_daily_jobs.py
similarity index 54%
rename from metrics/tests/test_daily_jobs.py
rename to metrics/tests/services/test_daily_jobs.py
index f31b410..0413ba6 100644
--- a/metrics/tests/test_daily_jobs.py
+++ b/metrics/tests/services/test_daily_jobs.py
@@ -1,15 +1,20 @@
 from datetime import date, timedelta
+from types import SimpleNamespace
+from unittest.mock import Mock, patch
 
-from django.contrib.auth import get_user_model
 from django.test import TestCase
 from django.utils import timezone
-from scielo_usage_counter.values import CONTENT_TYPE_FULL_TEXT, MEDIA_FORMAT_HTML
 
 from collection.models import Collection
 from log_manager import choices
 from log_manager.models import LogFile
 from metrics.models import DailyMetricJob
-from metrics import services
+from metrics.services.jobs import (
+    create_or_update_daily_metric_job,
+    mark_daily_metric_job_exported,
+    release_stale_daily_metric_jobs,
+)
+from metrics.services.parsing.job_payloads import build_daily_metric_job_payload
 
 
 class DailyMetricJobServiceTests(TestCase):
@@ -23,6 +28,7 @@ def _log_file(self, hash_value, status=choices.LOG_FILE_STATUS_QUEUED):
             stat_result={},
             status=status,
             collection=self.collection,
+            date=date(2012, 3, 10),
             validation={"probably_date": "2012-03-10"},
         )
 
@@ -39,7 +45,7 @@ def test_create_or_update_blocks_implicit_recompute_after_export(self):
         )
 
         with self.assertRaises(RuntimeError):
-            services.create_or_update_daily_metric_job(
+            create_or_update_daily_metric_job(
                 collection=self.collection,
                 access_date=date(2012, 3, 10),
                 log_files=[first, second],
@@ -57,7 +63,7 @@ def test_create_or_update_keeps_payload_for_export_retry(self):
             summary={"month_document_count": 1},
         )
 
-        services.create_or_update_daily_metric_job(
+        create_or_update_daily_metric_job(
             collection=self.collection,
             access_date=date(2012, 3, 10),
             log_files=[log_file],
@@ -69,7 +75,9 @@ def test_create_or_update_keeps_payload_for_export_retry(self):
         self.assertEqual(job.payload_hash, "abc")
         self.assertEqual(job.summary, {"month_document_count": 1})
 
-    def test_create_or_update_clears_stale_payload_when_inputs_change_before_success(self):
+    def test_create_or_update_clears_stale_payload_when_inputs_change_before_success(
+        self,
+    ):
         first = self._log_file("1" * 32)
         second = self._log_file("2" * 32)
         job = DailyMetricJob.objects.create(
@@ -82,7 +90,7 @@ def test_create_or_update_clears_stale_payload_when_inputs_change_before_success
             summary={"month_document_count": 1},
         )
 
-        services.create_or_update_daily_metric_job(
+        create_or_update_daily_metric_job(
             collection=self.collection,
             access_date=date(2012, 3, 10),
             log_files=[first, second],
@@ -104,59 +112,99 @@ def test_release_stale_daily_metric_jobs_marks_logs_for_retry(self):
             export_started_at=timezone.now() - timedelta(minutes=120),
         )
 
-        released = services.release_stale_daily_metric_jobs(stale_after_minutes=60)
+        released = release_stale_daily_metric_jobs(stale_after_minutes=60)
 
         log_file.refresh_from_db()
         self.assertEqual(released, 1)
         self.assertEqual(log_file.status, choices.LOG_FILE_STATUS_ERROR)
         self.assertIsNone(log_file.parse_heartbeat_at)
 
-    def test_process_line_discards_invalid_local_datetime_without_raising(self):
-        class FakeUtm:
-            def translate(self, url):
-                return {
-                    "book_id": "q7gtd",
-                    "pid_generic": "book:q7gtd",
-                    "media_language": "en",
-                    "media_format": MEDIA_FORMAT_HTML,
-                    "content_type": CONTENT_TYPE_FULL_TEXT,
-                }
-
-        log_file = self._log_file("1" * 32)
-        results = {}
-
-        is_valid, error = services.process_line(
-            results=results,
-            line={
-                "url": "/id/q7gtd",
-                "client_name": "browser",
-                "client_version": "1.0",
-                "ip_address": "127.0.0.1",
-                "country_code": "BR",
-                "local_datetime": None,
-            },
-            utm=FakeUtm(),
-            log_file=log_file,
+    def test_mark_daily_metric_job_exported_sets_status_and_timestamp(self):
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTING,
         )
 
-        self.assertFalse(is_valid)
-        self.assertIsNone(error)
-        self.assertEqual(results, {})
+        mark_daily_metric_job_exported(job)
 
-    def test_mark_daily_metric_job_exported_records_updated_by(self):
-        user = get_user_model().objects.create_user(
-            username="tester",
-            email="tester@example.org",
-            password="secret",
-        )
+        job.refresh_from_db()
+        self.assertEqual(job.status, DailyMetricJob.STATUS_EXPORTED)
+        self.assertIsNotNone(job.exported_at)
+
+    @patch(
+        "metrics.services.parsing.job_payloads.daily_payloads.write_payload",
+        return_value="payload-hash",
+    )
+    @patch(
+        "metrics.services.parsing.job_payloads.index_docs.convert",
+        return_value={"month": {}, "year": {}},
+    )
+    @patch(
+        "metrics.services.parsing.job_payloads.process_line", return_value=(True, None)
+    )
+    @patch("metrics.services.parsing.job_payloads.setup_parsing_environment")
+    def test_build_daily_metric_job_payload_uses_only_input_log_hashes(
+        self,
+        mock_setup_parsing_environment,
+        mock_process_line,
+        mock_convert_documents,
+        mock_write_payload,
+    ):
+        selected = self._log_file("1" * 32)
+        extra = self._log_file("2" * 32)
         job = DailyMetricJob.objects.create(
             collection=self.collection,
             access_date=date(2012, 3, 10),
             status=DailyMetricJob.STATUS_EXPORTING,
+            input_log_hashes=[selected.hash],
         )
 
-        services.mark_daily_metric_job_exported(job, user=user)
+        parser = Mock()
+        parser.stats = SimpleNamespace(lines_parsed=1)
+        parser.parse.return_value = [{"url": "/selected"}]
+        mock_setup_parsing_environment.return_value = (parser, Mock())
+
+        payload = build_daily_metric_job_payload(
+            job, robots_list=["robot"], mmdb=Mock(data={})
+        )
 
+        selected.refresh_from_db()
+        extra.refresh_from_db()
         job.refresh_from_db()
-        self.assertEqual(job.status, DailyMetricJob.STATUS_EXPORTED)
-        self.assertIsNotNone(job.exported_at)
+
+        self.assertEqual(payload["input_log_hashes"], [selected.hash])
+        self.assertEqual(job.input_log_hashes, [selected.hash])
+        self.assertEqual(selected.status, choices.LOG_FILE_STATUS_PARSING)
+        self.assertEqual(extra.status, choices.LOG_FILE_STATUS_QUEUED)
+        mock_setup_parsing_environment.assert_called_once()
+        self.assertEqual(
+            mock_setup_parsing_environment.call_args.kwargs["log_file"].hash,
+            selected.hash,
+        )
+
+    def test_build_daily_metric_job_payload_rejects_empty_input_hashes(self):
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTING,
+            input_log_hashes=[],
+        )
+
+        with self.assertRaisesMessage(RuntimeError, "has no input log hashes"):
+            build_daily_metric_job_payload(
+                job, robots_list=["robot"], mmdb=Mock(data={})
+            )
+
+    def test_build_daily_metric_job_payload_rejects_missing_input_hashes(self):
+        job = DailyMetricJob.objects.create(
+            collection=self.collection,
+            access_date=date(2012, 3, 10),
+            status=DailyMetricJob.STATUS_EXPORTING,
+            input_log_hashes=["9" * 32],
+        )
+
+        with self.assertRaisesMessage(RuntimeError, "is missing log files"):
+            build_daily_metric_job_payload(
+                job, robots_list=["robot"], mmdb=Mock(data={})
+            )
diff --git a/metrics/tests/test_tasks.py b/metrics/tests/services/test_tasks.py
similarity index 59%
rename from metrics/tests/test_tasks.py
rename to metrics/tests/services/test_tasks.py
index 5ffdaf0..abad24e 100644
--- a/metrics/tests/test_tasks.py
+++ b/metrics/tests/services/test_tasks.py
@@ -7,15 +7,21 @@
 from collection.models import Collection
 from log_manager import choices
 from log_manager.models import LogFile
-from metrics import tasks
 from metrics.models import DailyMetricJob
+from metrics.tasks.log_parsing import (
+    task_enqueue_log_parsing_jobs,
+    task_wait_log_parsing_wave,
+)
+from metrics.tasks.resume import task_resume_log_exports
 
 
 class ParseLogsTaskTests(TestCase):
     def setUp(self):
         self.collection = Collection.objects.create(acron3="books", acron2="bk")
 
-    def _log_file(self, hash_value, probably_date, status=choices.LOG_FILE_STATUS_QUEUED):
+    def _log_file(
+        self, hash_value, probably_date, status=choices.LOG_FILE_STATUS_QUEUED
+    ):
         return LogFile.objects.create(
             hash=hash_value,
             path=f"/tmp/{hash_value}.log.gz",
@@ -26,13 +32,17 @@ def _log_file(self, hash_value, probably_date, status=choices.LOG_FILE_STATUS_QU
             validation={"probably_date": probably_date},
         )
 
-    def test_task_parse_logs_enqueues_one_daily_job_per_collection_date(self):
+    def test_task_enqueue_log_parsing_jobs_enqueues_one_daily_job_per_collection_date(
+        self,
+    ):
         first = self._log_file("1" * 32, "2012-03-10")
         second = self._log_file("2" * 32, "2012-03-10")
         third = self._log_file("3" * 32, "2012-03-15")
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
-            result = tasks.task_parse_logs.run(
+        with patch(
+            "metrics.tasks.log_parsing.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            result = task_enqueue_log_parsing_jobs.run(
                 collections=["books"],
                 include_logs_with_error=False,
                 from_date="2012-03-01",
@@ -40,17 +50,24 @@ def test_task_parse_logs_enqueues_one_daily_job_per_collection_date(self):
             )
 
         self.assertEqual(result["enqueued_jobs"], 2)
+        self.assertEqual(result["enqueued_logs"], 3)
         self.assertEqual(mocked_apply_async.call_count, 2)
         jobs = list(DailyMetricJob.objects.order_by("access_date"))
-        self.assertEqual([job.access_date for job in jobs], [date(2012, 3, 10), date(2012, 3, 15)])
+        self.assertEqual(
+            [job.access_date for job in jobs], [date(2012, 3, 10), date(2012, 3, 15)]
+        )
         self.assertEqual(jobs[0].input_log_hashes, sorted([first.hash, second.hash]))
         self.assertEqual(jobs[1].input_log_hashes, [third.hash])
 
-    def test_task_parse_logs_allows_queue_override_and_robots_source(self):
+    def test_task_enqueue_log_parsing_jobs_allows_queue_override_and_robots_source(
+        self,
+    ):
         self._log_file("1" * 32, "2012-03-10")
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
-            tasks.task_parse_logs.run(
+        with patch(
+            "metrics.tasks.log_parsing.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            task_enqueue_log_parsing_jobs.run(
                 collections=["books"],
                 include_logs_with_error=False,
                 from_date="2012-03-01",
@@ -60,15 +77,46 @@ def test_task_parse_logs_allows_queue_override_and_robots_source(self):
             )
 
         mocked_apply_async.assert_called_once()
-        self.assertEqual(mocked_apply_async.call_args.kwargs["queue"], "parse_small_mult")
+        self.assertEqual(
+            mocked_apply_async.call_args.kwargs["queue"], "parse_small_mult"
+        )
         self.assertEqual(mocked_apply_async.call_args.kwargs["args"][-1], "counter")
 
-    def test_task_parse_logs_skip_log_hashes_prevents_reprocessing_same_auto_run(self):
-        skipped = self._log_file("1" * 32, "2012-03-10", status=choices.LOG_FILE_STATUS_ERROR)
-        queued = self._log_file("2" * 32, "2012-03-11")
+    def test_task_enqueue_log_parsing_jobs_excludes_error_logs_when_not_requested(self):
+        queued = self._log_file("1" * 32, "2012-03-10")
+        error = self._log_file(
+            "2" * 32, "2012-03-10", status=choices.LOG_FILE_STATUS_ERROR
+        )
+
+        with patch(
+            "metrics.tasks.log_parsing.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            result = task_enqueue_log_parsing_jobs.run(
+                collections=["books"],
+                include_logs_with_error=False,
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+            )
+
+        mocked_apply_async.assert_called_once()
+        job = DailyMetricJob.objects.get()
+        self.assertEqual(job.input_log_hashes, [queued.hash])
+        self.assertNotIn(error.hash, job.input_log_hashes)
+        self.assertEqual(result["enqueued_logs"], 1)
+        self.assertEqual(result["enqueued_jobs"], 1)
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
-            result = tasks.task_parse_logs.run(
+    def test_task_enqueue_log_parsing_jobs_skip_log_hashes_prevents_reprocessing_same_auto_run(
+        self,
+    ):
+        skipped = self._log_file(
+            "1" * 32, "2012-03-10", status=choices.LOG_FILE_STATUS_ERROR
+        )
+        queued = self._log_file("2" * 32, "2012-03-10")
+
+        with patch(
+            "metrics.tasks.log_parsing.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            result = task_enqueue_log_parsing_jobs.run(
                 collections=["books"],
                 include_logs_with_error=True,
                 from_date="2012-03-01",
@@ -80,17 +128,45 @@ def test_task_parse_logs_skip_log_hashes_prevents_reprocessing_same_auto_run(sel
         job = DailyMetricJob.objects.get()
         self.assertEqual(job.input_log_hashes, [queued.hash])
         self.assertEqual(result["enqueued_jobs"], 1)
+        self.assertEqual(result["enqueued_logs"], 1)
 
-    def test_wait_parse_logs_wave_rechecks_until_daily_jobs_complete(self):
+    def test_task_enqueue_log_parsing_jobs_max_log_files_counts_files_not_jobs(self):
+        first = self._log_file("1" * 32, "2012-03-10")
+        second = self._log_file("2" * 32, "2012-03-10")
+
+        with patch(
+            "metrics.tasks.log_parsing.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            result = task_enqueue_log_parsing_jobs.run(
+                collections=["books"],
+                include_logs_with_error=False,
+                max_log_files=1,
+                from_date="2012-03-01",
+                until_date="2012-03-31",
+            )
+
+        mocked_apply_async.assert_called_once()
+        job = DailyMetricJob.objects.get()
+        self.assertEqual(job.input_log_hashes, [first.hash])
+        self.assertNotIn(second.hash, job.input_log_hashes)
+        self.assertEqual(result["enqueued_logs"], 1)
+        self.assertEqual(result["enqueued_jobs"], 1)
+        self.assertTrue(result["reached_max_log_files"])
+
+    def test_wait_log_parsing_wave_rechecks_until_daily_jobs_complete(self):
         job = DailyMetricJob.objects.create(
             collection=self.collection,
             access_date=date(2012, 3, 10),
             status=DailyMetricJob.STATUS_EXPORTING,
         )
 
-        with patch("metrics.tasks.task_wait_parse_logs_wave.apply_async") as mocked_wait_apply_async:
-            with patch("metrics.tasks.task_parse_logs.apply_async") as mocked_parse_logs_apply_async:
-                result = tasks.task_wait_parse_logs_wave.run(
+        with patch(
+            "metrics.tasks.log_parsing.task_wait_log_parsing_wave.apply_async"
+        ) as mocked_wait_apply_async:
+            with patch(
+                "metrics.tasks.log_parsing.task_enqueue_log_parsing_jobs.apply_async"
+            ) as mocked_parse_logs_apply_async:
+                result = task_wait_log_parsing_wave.run(
                     wave_log_hashes=[job.pk],
                     collections=["books"],
                     include_logs_with_error=False,
@@ -98,26 +174,34 @@ def test_wait_parse_logs_wave_rechecks_until_daily_jobs_complete(self):
                     auto_reexecute=True,
                 )
 
-        self.assertEqual(result, {"wave_completed": False, "reexecution_enqueued": False})
+        self.assertEqual(
+            result, {"wave_completed": False, "reexecution_enqueued": False}
+        )
         mocked_parse_logs_apply_async.assert_not_called()
         mocked_wait_apply_async.assert_called_once()
 
-    def test_wait_parse_logs_wave_preserves_queue_name(self):
+    def test_wait_log_parsing_wave_preserves_queue_name(self):
         job = DailyMetricJob.objects.create(
             collection=self.collection,
             access_date=date(2012, 3, 10),
             status=DailyMetricJob.STATUS_EXPORTING,
         )
 
-        with patch("metrics.tasks.task_wait_parse_logs_wave.apply_async") as mocked_wait_apply_async:
-            result = tasks.task_wait_parse_logs_wave.run(
+        with patch(
+            "metrics.tasks.log_parsing.task_wait_log_parsing_wave.apply_async"
+        ) as mocked_wait_apply_async:
+            result = task_wait_log_parsing_wave.run(
                 wave_log_hashes=[job.pk],
                 collections=["books"],
                 queue_name="parse_small",
             )
 
-        self.assertEqual(result, {"wave_completed": False, "reexecution_enqueued": False})
-        self.assertEqual(mocked_wait_apply_async.call_args.kwargs["queue"], "parse_small")
+        self.assertEqual(
+            result, {"wave_completed": False, "reexecution_enqueued": False}
+        )
+        self.assertEqual(
+            mocked_wait_apply_async.call_args.kwargs["queue"], "parse_small"
+        )
 
 
 class ResumeDailyMetricJobTests(TestCase):
@@ -140,8 +224,10 @@ def test_resume_log_exports_requeues_error_daily_jobs(self):
             input_log_hashes=[log_file.hash],
         )
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
-            result = tasks.task_resume_log_exports.run(
+        with patch(
+            "metrics.tasks.resume.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            result = task_resume_log_exports.run(
                 collections=["books"],
                 from_date="2012-03-01",
                 until_date="2012-03-31",
@@ -150,7 +236,9 @@ def test_resume_log_exports_requeues_error_daily_jobs(self):
 
         mocked_apply_async.assert_called_once()
         self.assertEqual(mocked_apply_async.call_args.kwargs["args"][0], job.pk)
-        self.assertEqual(mocked_apply_async.call_args.kwargs["queue"], "parse_small_mult")
+        self.assertEqual(
+            mocked_apply_async.call_args.kwargs["queue"], "parse_small_mult"
+        )
         self.assertEqual(result["resumed_logs"], 1)
 
     def test_resume_log_exports_clears_payload_when_current_logs_change(self):
@@ -172,8 +260,10 @@ def test_resume_log_exports_clears_payload_when_current_logs_change(self):
             summary={"month_document_count": 1},
         )
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async"):
-            tasks.task_resume_log_exports.run(
+        with patch(
+            "metrics.tasks.resume.task_build_and_export_daily_metric_job.apply_async"
+        ):
+            task_resume_log_exports.run(
                 collections=["books"],
                 from_date="2012-03-01",
                 until_date="2012-03-31",
@@ -204,8 +294,10 @@ def test_resume_log_exports_preserves_payload_when_current_logs_match(self):
             summary={"month_document_count": 1},
         )
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async"):
-            tasks.task_resume_log_exports.run(
+        with patch(
+            "metrics.tasks.resume.task_build_and_export_daily_metric_job.apply_async"
+        ):
+            task_resume_log_exports.run(
                 collections=["books"],
                 from_date="2012-03-01",
                 until_date="2012-03-31",
@@ -226,8 +318,10 @@ def test_resume_log_exports_requeues_stored_payload_without_current_logs(self):
             payload_hash="abc",
         )
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
-            result = tasks.task_resume_log_exports.run(
+        with patch(
+            "metrics.tasks.resume.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            result = task_resume_log_exports.run(
                 collections=["books"],
                 from_date="2012-03-01",
                 until_date="2012-03-31",
@@ -244,8 +338,10 @@ def test_resume_log_exports_skips_jobs_without_logs_or_payload(self):
             status=DailyMetricJob.STATUS_ERROR,
         )
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
-            result = tasks.task_resume_log_exports.run(
+        with patch(
+            "metrics.tasks.resume.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            result = task_resume_log_exports.run(
                 collections=["books"],
                 from_date="2012-03-01",
                 until_date="2012-03-31",
@@ -271,13 +367,15 @@ def test_resume_log_exports_releases_stale_exporting_jobs(self):
             export_started_at=timezone.now() - timedelta(minutes=120),
         )
 
-        with patch("metrics.tasks.task_process_daily_metric_job.apply_async") as mocked_apply_async:
-            result = tasks.task_resume_log_exports.run(
+        with patch(
+            "metrics.tasks.resume.task_build_and_export_daily_metric_job.apply_async"
+        ) as mocked_apply_async:
+            result = task_resume_log_exports.run(
                 collections=["books"],
                 from_date="2012-03-01",
                 until_date="2012-03-31",
                 stale_after_minutes=60,
-        )
+            )
 
         job.refresh_from_db()
         self.assertEqual(job.status, DailyMetricJob.STATUS_PENDING)
diff --git a/metrics/tests/test_index_utils.py b/metrics/tests/test_index_utils.py
deleted file mode 100644
index e0f0aef..0000000
--- a/metrics/tests/test_index_utils.py
+++ /dev/null
@@ -1,1000 +0,0 @@
-import csv
-import unittest
-from datetime import datetime
-from pathlib import Path
-from tempfile import TemporaryDirectory
-
-from scielo_usage_counter.values import (
-    CONTENT_TYPE_ABSTRACT,
-    CONTENT_TYPE_FULL_TEXT,
-    CONTENT_TYPE_UNDEFINED,
-    DEFAULT_SCIELO_ISSN,
-    MEDIA_FORMAT_HTML,
-    MEDIA_FORMAT_PDF,
-    MEDIA_FORMAT_UNDEFINED,
-)
-
-from metrics.counter import access
-from metrics.counter import documents as index_docs
-from metrics.opensearch.names import generate_month_index_name, generate_year_index_name
-
-
-class TestIndexUtils(unittest.TestCase):
-    def test_is_valid_item_access_data_valid(self):
-        data = {
-            "scielo_issn": "1234-5678",
-            "pid_v2": "S0102-67202020000100001",
-            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_PDF,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertTrue(result)
-
-    def test_is_valid_item_access_data_missing_scielo_issn(self):
-        data = {
-            "scielo_issn": "",
-            "pid_v2": "S0102-67202020000100001",
-            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_PDF,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertFalse(result)
-
-    def test_is_valid_item_access_data_valid_book_source(self):
-        data = {
-            "source_type": "book",
-            "source_id": "q7gtd",
-            "scielo_issn": DEFAULT_SCIELO_ISSN,
-            "pid_generic": "BOOK:Q7GTD",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_HTML,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertTrue(result)
-
-    def test_is_valid_item_access_data_undefined_media_format(self):
-        data = {
-            "scielo_issn": "1234-5678",
-            "pid_v2": "S0102-67202020000100001",
-            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_UNDEFINED,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertFalse(result)
-
-    def test_is_valid_item_access_data_undefined_content_type(self):
-        data = {
-            "scielo_issn": "1234-5678",
-            "pid_v2": "S0102-67202020000100001",
-            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_PDF,
-            "content_type": CONTENT_TYPE_UNDEFINED,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertFalse(result)
-
-    def test_is_valid_item_access_data_missing_pid_v2_and_pid_v3(self):
-        data = {
-            "scielo_issn": "1234-5678",
-            "pid_v2": "",
-            "pid_v3": "",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_PDF,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertFalse(result)
-
-    def test_is_valid_item_access_data_media_format_html(self):
-        data = {
-            "scielo_issn": "1234-5678",
-            "pid_v2": "S0102-67202020000100001",
-            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_HTML,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertTrue(result)
-
-    def test_is_valid_item_access_data_content_type_abstract(self):
-        data = {
-            "scielo_issn": "1234-5678",
-            "pid_v2": "S0102-67202020000100001",
-            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_PDF,
-            "content_type": CONTENT_TYPE_ABSTRACT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertTrue(result)
-
-    def test_is_valid_item_access_data_dataset_without_source_or_language_is_valid(
-        self,
-    ):
-        data = {
-            "document_type": "dataset",
-            "scielo_issn": DEFAULT_SCIELO_ISSN,
-            "pid_v2": None,
-            "pid_v3": None,
-            "pid_generic": "DOI:10.48331/SCIELODATA.JLMAIY",
-            "media_language": "un",
-            "media_format": MEDIA_FORMAT_HTML,
-            "content_type": CONTENT_TYPE_ABSTRACT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertTrue(result)
-
-    def test_is_valid_item_access_data_missing_media_language_is_invalid(self):
-        data = {
-            "scielo_issn": "1234-5678",
-            "pid_v2": "S0102-67202020000100001",
-            "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-            "media_language": "",
-            "media_format": MEDIA_FORMAT_PDF,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-        }
-        result, _ = access.is_valid_item_access_data(data)
-        self.assertFalse(result)
-
-    def test_extract_item_access_data_normalizes_source_fields_for_journal(self):
-        data = access.extract_item_access_data(
-            "scl",
-            {
-                "scielo_issn": "1234-5678",
-                "pid_v2": "S0102-67202020000100001",
-                "media_language": "en",
-                "media_format": MEDIA_FORMAT_PDF,
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "publication_year": "2024",
-                "journal_main_title": "Journal Title",
-                "journal_subject_area_capes": ["Health Sciences"],
-                "journal_subject_area_wos": ["Medicine"],
-                "journal_acronym": "testjou",
-                "journal_publisher_name": ["SciELO"],
-            },
-        )
-
-        self.assertEqual(data["source_type"], "journal")
-        self.assertEqual(data["source_id"], "1234-5678")
-        self.assertEqual(data["source_main_title"], "Journal Title")
-        self.assertEqual(data["source_acronym"], "testjou")
-
-    def test_extract_item_access_data_normalizes_source_fields_for_books(self):
-        data = access.extract_item_access_data(
-            "books",
-            {
-                "book_id": "q7gtd",
-                "book_title": "Book Title",
-                "title_pid_generic": "book:q7gtd",
-                "pid_generic": "book:q7gtd/chapter:03",
-                "media_language": "en",
-                "media_format": MEDIA_FORMAT_HTML,
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "publication_year": "2023",
-            },
-        )
-
-        self.assertEqual(data["source_type"], "book")
-        self.assertEqual(data["source_id"], "q7gtd")
-        self.assertEqual(data["scielo_issn"], DEFAULT_SCIELO_ISSN)
-        self.assertEqual(data["source_main_title"], "Book Title")
-        self.assertEqual(data["title_pid_generic"], "BOOK:Q7GTD")
-
-    def test_extract_item_access_data_preserves_access_url_and_free_to_read(self):
-        data = access.extract_item_access_data(
-            "books",
-            {
-                "book_id": "c2248",
-                "book_title": "Book Title",
-                "title_pid_generic": "book:c2248",
-                "pid_generic": "book:c2248",
-                "media_language": "pt",
-                "media_format": MEDIA_FORMAT_PDF,
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "access_url": "/id/c2248/pdf/freitas-9788599662830.pdf",
-                "source_access_type": "free_to_read",
-            },
-        )
-
-        self.assertEqual(data["access_url"], "/id/c2248/pdf/freitas-9788599662830.pdf")
-        self.assertEqual(data["counter_access_type"], "Free_To_Read")
-
-    def test_extract_item_access_data_tolerates_malformed_media_language(self):
-        data = access.extract_item_access_data(
-            "books",
-            {
-                "book_id": "q7gtd",
-                "pid_generic": "book:q7gtd",
-                "media_language": "'",
-                "media_format": MEDIA_FORMAT_HTML,
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-            },
-        )
-
-        self.assertEqual(data["media_language"], "un")
-
-    def test_extract_item_access_data_sets_document_title_by_type(self):
-        chapter = access.extract_item_access_data(
-            "books",
-            {
-                "book_id": "q7gtd",
-                "chapter_id": "03",
-                "pid_generic": "book:q7gtd/chapter:03",
-                "book_title": "Book Title",
-                "chapter_title": "Chapter Title",
-                "media_format": MEDIA_FORMAT_HTML,
-                "media_language": "en",
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-            },
-        )
-        book = access.extract_item_access_data(
-            "books",
-            {
-                "book_id": "q7gtd",
-                "pid_generic": "book:q7gtd",
-                "book_title": "Book Title",
-                "media_format": MEDIA_FORMAT_HTML,
-                "media_language": "en",
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-            },
-        )
-        article = access.extract_item_access_data(
-            "scl",
-            {
-                "scielo_issn": "1234-5678",
-                "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-                "article_title": "Article Title",
-                "media_format": MEDIA_FORMAT_HTML,
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-            },
-        )
-
-        self.assertEqual(chapter["document_title"], "Chapter Title")
-        self.assertEqual(book["document_title"], "Book Title")
-        self.assertEqual(article["document_title"], "Article Title")
-
-    def test_extract_item_access_data_normalizes_scielo_collection_document_types(self):
-        preprint = access.extract_item_access_data(
-            "preprints",
-            {
-                "pid_generic": "10.1590/SciELOPreprints.1234",
-                "media_format": MEDIA_FORMAT_HTML,
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-            },
-        )
-        dataset = access.extract_item_access_data(
-            "data",
-            {
-                "pid_generic": "10.48331/scielodata.abc123",
-                "media_format": MEDIA_FORMAT_HTML,
-                "content_type": CONTENT_TYPE_ABSTRACT,
-            },
-        )
-        article = access.extract_item_access_data(
-            "scl",
-            {
-                "scielo_issn": "1234-5678",
-                "pid_v3": "jGJccQ7bFdbz6wy3nfXGVdv",
-                "media_format": MEDIA_FORMAT_HTML,
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-            },
-        )
-
-        self.assertEqual(preprint["source_type"], "preprint_server")
-        self.assertEqual(preprint["document_type"], "preprint")
-        self.assertEqual(dataset["source_type"], "data_repository")
-        self.assertEqual(dataset["document_type"], "dataset")
-        self.assertEqual(article["source_type"], "journal")
-        self.assertEqual(article["document_type"], "article")
-
-    def test_update_results_with_item_access_data_stores_source_and_periods(self):
-        results = {}
-        item_access_data = {
-            "collection": "books",
-            "source_type": "book",
-            "source_id": "q7gtd",
-            "scielo_issn": DEFAULT_SCIELO_ISSN,
-            "pid_v2": None,
-            "pid_v3": None,
-            "pid_generic": "BOOK:Q7GTD",
-            "title_pid_generic": "BOOK:Q7GTD",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_HTML,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-            "publication_year": "2023",
-            "document_title": "Book Title",
-            "source_main_title": "Book Title",
-            "source_subject_area_capes": [],
-            "source_subject_area_wos": [],
-            "source_acronym": None,
-            "source_publisher_name": ["SciELO Books"],
-        }
-        line = {
-            "client_name": "browser",
-            "client_version": "1.0",
-            "ip_address": "127.0.0.1",
-            "country_code": "BR",
-            "local_datetime": datetime(2024, 1, 15, 10, 0, 5),
-        }
-
-        access.update_results_with_item_access_data(results, item_access_data, line)
-
-        self.assertEqual(len(results), 1)
-        result = next(iter(results.values()))
-        self.assertEqual(result["source"]["source_type"], "book")
-        self.assertEqual(result["source"]["source_id"], "q7gtd")
-        self.assertEqual(result["source"]["main_title"], "Book Title")
-        self.assertEqual(result["access_date"], "2024-01-15")
-        self.assertEqual(result["access_month"], "202401")
-        self.assertEqual(result["access_year"], "2024")
-        self.assertEqual(result["access_country_code"], "BR")
-        self.assertEqual(result["content_language"], "en")
-        self.assertEqual(result["title_pid_generic"], "BOOK:Q7GTD")
-        self.assertEqual(result["document"], {"title": "Book Title"})
-        self.assertIn("user_session_id", result)
-
-    def test_update_results_with_item_access_data_rejects_invalid_local_datetime(self):
-        results = {}
-        item_access_data = {
-            "collection": "books",
-            "source_type": "book",
-            "source_id": "q7gtd",
-            "scielo_issn": DEFAULT_SCIELO_ISSN,
-            "pid_generic": "BOOK:Q7GTD",
-            "media_language": "en",
-            "media_format": MEDIA_FORMAT_HTML,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-        }
-        line = {
-            "client_name": "browser",
-            "client_version": "1.0",
-            "ip_address": "127.0.0.1",
-            "country_code": "BR",
-            "local_datetime": None,
-        }
-
-        with self.assertRaises(ValueError):
-            access.update_results_with_item_access_data(results, item_access_data, line)
-
-        self.assertEqual(results, {})
-
-    def test_update_results_with_item_access_data_does_not_expand_book_into_segments(
-        self,
-    ):
-        results = {}
-        item_access_data = {
-            "collection": "books",
-            "source_type": "book",
-            "source_id": "c2248",
-            "scielo_issn": DEFAULT_SCIELO_ISSN,
-            "pid_v2": None,
-            "pid_v3": None,
-            "pid_generic": "BOOK:C2248",
-            "title_pid_generic": "BOOK:C2248",
-            "segment_pid_generics": [
-                "BOOK:C2248/CHAPTER:00",
-                "BOOK:C2248/CHAPTER:01",
-                "BOOK:C2248/CHAPTER:02",
-            ],
-            "media_language": "pt",
-            "media_format": MEDIA_FORMAT_PDF,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-            "publication_year": "2018",
-            "source_main_title": "C2248 Book",
-        }
-        line = {
-            "client_name": "browser",
-            "client_version": "1.0",
-            "ip_address": "127.0.0.1",
-            "country_code": "BR",
-            "local_datetime": datetime(2024, 1, 15, 10, 0, 5),
-        }
-
-        access.update_results_with_item_access_data(results, item_access_data, line)
-
-        self.assertEqual(len(results), 1)
-        result = list(results.values())[0]
-        self.assertEqual(result["pid_generic"], "BOOK:C2248")
-
-    def test_double_click_filter_uses_url_bucket_for_same_item(self):
-        results = {}
-        item_access_data = {
-            "collection": "books",
-            "source_type": "book",
-            "source_id": "c2248",
-            "scielo_issn": DEFAULT_SCIELO_ISSN,
-            "pid_v2": None,
-            "pid_v3": None,
-            "pid_generic": "BOOK:C2248/CHAPTER:03",
-            "title_pid_generic": "BOOK:C2248",
-            "media_language": "pt",
-            "media_format": MEDIA_FORMAT_HTML,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-            "publication_year": "2018",
-            "source_main_title": "C2248 Book",
-        }
-        base_line = {
-            "client_name": "browser",
-            "client_version": "1.0",
-            "ip_address": "127.0.0.1",
-            "country_code": "BR",
-        }
-
-        access.update_results_with_item_access_data(
-            results,
-            item_access_data,
-            {
-                **base_line,
-                "local_datetime": datetime(2024, 1, 15, 10, 0, 5),
-                "url": "/id/c2248/03",
-            },
-        )
-        access.update_results_with_item_access_data(
-            results,
-            item_access_data,
-            {
-                **base_line,
-                "local_datetime": datetime(2024, 1, 15, 10, 0, 20),
-                "url": "https://books.scielo.org/id/c2248/epub/03.html?x=1",
-            },
-        )
-
-        raw = next(iter(results.values()))
-        self.assertEqual(
-            set(raw["click_timestamps_by_url"]),
-            {"/id/c2248/03", "/id/c2248/epub/03.html"},
-        )
-
-        metrics_data = index_docs.convert_raw_results_to_index_documents(results)
-        month_item = metrics_data["month"][
-            "books|c2248|||BOOK:C2248/CHAPTER:03|2024-01|Open|Regular|2018"
-        ]
-
-        self.assertEqual(month_item["total_requests"], 2)
-        self.assertEqual(month_item["unique_requests"], 1)
-
-    def test_double_click_filter_collapses_same_url_within_30_seconds(self):
-        results = {}
-        item_access_data = {
-            "collection": "books",
-            "source_type": "book",
-            "source_id": "c2248",
-            "scielo_issn": DEFAULT_SCIELO_ISSN,
-            "pid_v2": None,
-            "pid_v3": None,
-            "pid_generic": "BOOK:C2248/CHAPTER:03",
-            "title_pid_generic": "BOOK:C2248",
-            "media_language": "pt",
-            "media_format": MEDIA_FORMAT_HTML,
-            "content_type": CONTENT_TYPE_FULL_TEXT,
-            "publication_year": "2018",
-            "source_main_title": "C2248 Book",
-        }
-        base_line = {
-            "client_name": "browser",
-            "client_version": "1.0",
-            "ip_address": "127.0.0.1",
-            "country_code": "BR",
-            "url": "/id/c2248/03?from=search",
-        }
-
-        access.update_results_with_item_access_data(
-            results,
-            item_access_data,
-            {**base_line, "local_datetime": datetime(2024, 1, 15, 10, 0, 5)},
-        )
-        access.update_results_with_item_access_data(
-            results,
-            item_access_data,
-            {**base_line, "local_datetime": datetime(2024, 1, 15, 10, 0, 20)},
-        )
-
-        raw = next(iter(results.values()))
-        self.assertEqual(
-            raw["click_timestamps_by_url"],
-            {"/id/c2248/03": {"00:05": 1, "00:20": 1}},
-        )
-
-        metrics_data = index_docs.convert_raw_results_to_index_documents(results)
-        month_item = metrics_data["month"][
-            "books|c2248|||BOOK:C2248/CHAPTER:03|2024-01|Open|Regular|2018"
-        ]
-
-        self.assertEqual(month_item["total_requests"], 1)
-        self.assertEqual(month_item["unique_requests"], 1)
-
-    def test_generate_index_names_for_year_and_month(self):
-        self.assertEqual(
-            generate_year_index_name("usage", "scl", "2024-01-15"),
-            "usage_yearly_scl_2024",
-        )
-        self.assertEqual(
-            generate_month_index_name("usage", "scl", "2024-01-15"),
-            "usage_monthly_scl_2024",
-        )
-        self.assertEqual(
-            generate_year_index_name("usage", "books", "2024-01-15"),
-            "usage_yearly_books",
-        )
-        self.assertEqual(
-            generate_month_index_name("usage", "books", "2024-01-15"),
-            "usage_monthly_books",
-        )
-
-    def test_convert_raw_results_to_index_documents_creates_month_and_year_views(self):
-        data = {
-            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|browser|1.0|127.0.0.1|BR|en|html|full_text": {
-                "collection": "books",
-                "source_key": "q7gtd",
-                "document_type": "chapter",
-                "pid_v2": None,
-                "pid_v3": None,
-                "pid_generic": "BOOK:Q7GTD/CHAPTER:03",
-                "document": {"title": "Chapter Title"},
-                "title_pid_generic": "BOOK:Q7GTD",
-                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
-                "click_timestamps": {"00:05": 1},
-                "access_country_code": "BR",
-                "content_language": "en",
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "access_date": "2024-01-15",
-                "access_month": "202401",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "book",
-                    "source_id": "q7gtd",
-                    "scielo_issn": DEFAULT_SCIELO_ISSN,
-                    "main_title": "Book Title",
-                    "identifiers": {
-                        "book_id": "q7gtd",
-                        "isbn": "9788578791889",
-                    },
-                    "city": "Sao Paulo",
-                    "country": "BR",
-                    "subject_area_capes": [],
-                    "subject_area_wos": [],
-                    "acronym": None,
-                    "publisher_name": ["SciELO Books"],
-                },
-                "publication_year": "2023",
-            }
-        }
-
-        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
-
-        self.assertEqual(set(metrics_data.keys()), {"month", "year"})
-        self.assertEqual(len(metrics_data["month"]), 2)
-        self.assertEqual(len(metrics_data["year"]), 2)
-
-        month_item = metrics_data["month"][
-            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|2024-01|Open|Regular|2023"
-        ]
-        self.assertEqual(month_item["access"], {"month": "2024-01"})
-        self.assertIn("daily_metrics", month_item)
-        self.assertNotIn("by_day", month_item)
-        self.assertNotIn("access_country_code", month_item)
-        self.assertNotIn("content_language", month_item)
-        self.assertEqual(month_item["document"]["id"], "BOOK:Q7GTD/CHAPTER:03")
-        self.assertEqual(month_item["document"]["type"], "chapter")
-        self.assertEqual(month_item["document"]["title"], "Chapter Title")
-        self.assertEqual(month_item["document"]["parent_id"], "BOOK:Q7GTD")
-        self.assertEqual(month_item["document"]["publication_year"], "2023")
-        self.assertEqual(month_item["document"]["identifiers"]["book_id"], "q7gtd")
-        self.assertEqual(month_item["document"]["identifiers"]["chapter_id"], "03")
-        self.assertEqual(month_item["document"]["identifiers"]["isbn"], "9788578791889")
-        self.assertNotIn("pid_generic", month_item["document"]["identifiers"])
-        self.assertEqual(month_item["counter"]["metric_scope"], "item")
-        self.assertEqual(month_item["counter"]["data_type"], "Book_Segment")
-        self.assertEqual(month_item["total_requests"], 1)
-        self.assertEqual(month_item["unique_requests"], 1)
-        self.assertNotIn("scielo_issn", month_item["source"])
-        self.assertNotIn("book_id", month_item["source"]["identifiers"])
-        self.assertEqual(month_item["source"]["publisher_name"], ["SciELO Books"])
-
-        month_title = metrics_data["month"][
-            "title|books|q7gtd|||BOOK:Q7GTD|2024-01|Open|Regular|2023"
-        ]
-        self.assertEqual(month_title["document"]["id"], "BOOK:Q7GTD")
-        self.assertEqual(month_title["document"]["type"], "book")
-        self.assertEqual(month_title["document"]["title"], "Book Title")
-        self.assertNotIn("parent_id", month_title["document"])
-        self.assertEqual(month_title["counter"]["metric_scope"], "title")
-        self.assertEqual(month_title["counter"]["data_type"], "Book")
-        self.assertEqual(month_title["total_requests"], 1)
-        self.assertEqual(month_title["total_investigations"], 1)
-        self.assertEqual(month_title["unique_requests"], 1)
-        self.assertEqual(month_title["unique_investigations"], 1)
-
-        year_item = metrics_data["year"][
-            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:03|en|BR|2024|Open|Regular|2023"
-        ]
-        self.assertEqual(
-            year_item["access"],
-            {"year": "2024", "country_code": "BR", "content_language": "en"},
-        )
-        self.assertNotIn("daily_metrics", year_item)
-        self.assertNotIn("by_day", year_item)
-        self.assertNotIn("access_month", year_item)
-        self.assertEqual(year_item["document"]["title"], "Chapter Title")
-        self.assertEqual(year_item["counter"]["metric_scope"], "item")
-        self.assertEqual(year_item["total_requests"], 1)
-
-        year_title = metrics_data["year"][
-            "title|books|q7gtd|||BOOK:Q7GTD|en|BR|2024|Open|Regular|2023"
-        ]
-        self.assertEqual(year_title["counter"]["metric_scope"], "title")
-        self.assertEqual(year_title["document"]["title"], "Book Title")
-        self.assertNotIn("daily_metrics", year_title)
-        self.assertNotIn("by_day", year_title)
-        self.assertNotIn("access_month", year_title)
-        self.assertEqual(year_title["total_requests"], 1)
-        self.assertEqual(year_title["total_investigations"], 1)
-        self.assertEqual(year_title["unique_requests"], 1)
-        self.assertEqual(year_title["unique_investigations"], 1)
-
-    def test_convert_raw_results_to_index_documents_maps_counter_data_types(self):
-        data = {
-            "preprints|scielo-preprints|||10.1590/SCIELOPREPRINTS.1234|sess|BR|un|html|full_text": {
-                "collection": "preprints",
-                "source_key": "scielo-preprints",
-                "document_type": "preprint",
-                "pid_generic": "10.1590/SCIELOPREPRINTS.1234",
-                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
-                "click_timestamps": {"00:05": 1},
-                "access_country_code": "BR",
-                "content_language": "un",
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "access_date": "2024-01-15",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "preprint_server",
-                    "source_id": "scielo-preprints",
-                    "main_title": "SciELO Preprints",
-                },
-                "publication_year": "2024",
-            },
-            "data|scielo-data|||10.48331/SCIELODATA.ABC123|sess|BR|un|html|abstract": {
-                "collection": "data",
-                "source_key": "scielo-data",
-                "document_type": "dataset",
-                "pid_generic": "10.48331/SCIELODATA.ABC123",
-                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
-                "click_timestamps": {"00:05": 1},
-                "access_country_code": "BR",
-                "content_language": "un",
-                "content_type": CONTENT_TYPE_ABSTRACT,
-                "access_date": "2024-01-15",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "data_repository",
-                    "source_id": "scielo-data",
-                    "main_title": "SciELO Data",
-                },
-                "publication_year": "2024",
-            },
-        }
-
-        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
-        preprint_doc = metrics_data["month"][
-            "preprints|scielo-preprints|||10.1590/SCIELOPREPRINTS.1234|2024-01|Open|Regular|2024"
-        ]
-        dataset_doc = metrics_data["month"][
-            "data|scielo-data|||10.48331/SCIELODATA.ABC123|2024-01|Open|Regular|2024"
-        ]
-
-        self.assertEqual(preprint_doc["counter"]["data_type"], "Article")
-        self.assertEqual(preprint_doc["document"]["type"], "preprint")
-        self.assertEqual(preprint_doc["document"]["id"], "10.1590/SCIELOPREPRINTS.1234")
-        self.assertNotIn("pid_generic", preprint_doc["document"].get("identifiers", {}))
-        self.assertNotIn("scielo_document_type", preprint_doc)
-        self.assertEqual(preprint_doc["counter"]["article_version"], "Preprint")
-        self.assertEqual(dataset_doc["counter"]["data_type"], "Dataset")
-        self.assertNotIn("article_version", dataset_doc["counter"])
-
-    def test_convert_raw_results_to_index_documents_dedupes_book_unique_item_across_formats(
-        self,
-    ):
-        data = {
-            "books|c2248|||BOOK:C2248/CHAPTER:03|sess|BR|pt|html|full_text": {
-                "collection": "books",
-                "source_key": "c2248",
-                "document_type": "chapter",
-                "pid_v2": None,
-                "pid_v3": None,
-                "pid_generic": "BOOK:C2248/CHAPTER:03",
-                "title_pid_generic": "BOOK:C2248",
-                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
-                "click_timestamps": {"00:05": 1},
-                "access_country_code": "BR",
-                "content_language": "pt",
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "access_date": "2024-01-15",
-                "access_month": "202401",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "book",
-                    "source_id": "c2248",
-                    "main_title": "C2248 Book",
-                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
-                    "publisher_name": ["SciELO Books"],
-                },
-                "publication_year": "2018",
-            },
-            "books|c2248|||BOOK:C2248/CHAPTER:03|sess|BR|pt|pdf|full_text": {
-                "collection": "books",
-                "source_key": "c2248",
-                "document_type": "chapter",
-                "pid_v2": None,
-                "pid_v3": None,
-                "pid_generic": "BOOK:C2248/CHAPTER:03",
-                "title_pid_generic": "BOOK:C2248",
-                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
-                "click_timestamps": {"00:45": 1},
-                "access_country_code": "BR",
-                "content_language": "pt",
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "access_date": "2024-01-15",
-                "access_month": "202401",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "book",
-                    "source_id": "c2248",
-                    "main_title": "C2248 Book",
-                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
-                    "publisher_name": ["SciELO Books"],
-                },
-                "publication_year": "2018",
-            },
-        }
-
-        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
-
-        month_item = metrics_data["month"][
-            "books|c2248|||BOOK:C2248/CHAPTER:03|2024-01|Open|Regular|2018"
-        ]
-        month_title = metrics_data["month"][
-            "title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"
-        ]
-
-        self.assertEqual(month_item["total_requests"], 2)
-        self.assertEqual(month_item["total_investigations"], 2)
-        self.assertEqual(month_item["unique_requests"], 1)
-        self.assertEqual(month_item["unique_investigations"], 1)
-        self.assertEqual(month_title["unique_requests"], 1)
-        self.assertEqual(month_title["unique_investigations"], 1)
-
-    def test_convert_raw_results_to_index_documents_skips_book_landing_page_from_item_scope(
-        self,
-    ):
-        data = {
-            "books|c2248|||BOOK:C2248|sess|BR|pt|html|abstract": {
-                "collection": "books",
-                "source_key": "c2248",
-                "document_type": "book",
-                "pid_v2": None,
-                "pid_v3": None,
-                "pid_generic": "BOOK:C2248",
-                "document": {"title": "C2248 Book"},
-                "title_pid_generic": "BOOK:C2248",
-                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
-                "click_timestamps": {"00:05": 1},
-                "access_country_code": "BR",
-                "content_language": "pt",
-                "content_type": CONTENT_TYPE_ABSTRACT,
-                "access_date": "2024-01-15",
-                "access_month": "202401",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "book",
-                    "source_id": "c2248",
-                    "main_title": "C2248 Book",
-                    "identifiers": {"book_id": "c2248", "isbn": "9788599662830"},
-                    "publisher_name": ["SciELO Books"],
-                },
-                "publication_year": "2018",
-            },
-        }
-
-        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
-
-        self.assertEqual(
-            set(metrics_data["month"].keys()),
-            {"title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"},
-        )
-        self.assertEqual(
-            set(metrics_data["year"].keys()),
-            {"title|books|c2248|||BOOK:C2248|pt|BR|2024|Open|Regular|2018"},
-        )
-
-    def test_convert_raw_results_to_index_documents_counts_whole_book_without_segments_as_book_segment(
-        self,
-    ):
-        data = {
-            "books|c2248|||BOOK:C2248|sess|BR|pt|pdf|full_text": {
-                "collection": "books",
-                "source_key": "c2248",
-                "document_type": "book",
-                "pid_v2": None,
-                "pid_v3": None,
-                "pid_generic": "BOOK:C2248",
-                "document": {"title": "C2248 Book"},
-                "title_pid_generic": "BOOK:C2248",
-                "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
-                "click_timestamps": {"00:05": 1},
-                "access_country_code": "BR",
-                "content_language": "pt",
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "access_date": "2024-01-15",
-                "access_month": "202401",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "book",
-                    "source_id": "c2248",
-                    "main_title": "C2248 Book",
-                    "identifiers": {"book_id": "c2248"},
-                    "publisher_name": ["SciELO Books"],
-                },
-                "publication_year": "2018",
-            },
-        }
-
-        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
-        month_item = metrics_data["month"][
-            "books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"
-        ]
-        month_title = metrics_data["month"][
-            "title|books|c2248|||BOOK:C2248|2024-01|Open|Regular|2018"
-        ]
-
-        self.assertEqual(month_item["counter"]["data_type"], "Book_Segment")
-        self.assertEqual(month_item["counter"]["metric_scope"], "item")
-        self.assertEqual(month_item["document"]["id"], "BOOK:C2248")
-        self.assertEqual(month_item["document"]["title"], "C2248 Book")
-        self.assertNotIn("parent_id", month_item["document"])
-        self.assertEqual(month_title["counter"]["data_type"], "Book")
-        self.assertEqual(month_title["counter"]["metric_scope"], "title")
-        self.assertEqual(month_title["document"]["id"], "BOOK:C2248")
-        self.assertEqual(month_title["document"]["title"], "C2248 Book")
-
-    def test_convert_raw_results_aggregates_multiple_chapters_correctly(self):
-        """Test that accessing multiple chapters creates correct title-level totals"""
-        data = {
-            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:01|session1|BR|en|html|full_text": {
-                "collection": "books",
-                "source_key": "q7gtd",
-                "document_type": "chapter",
-                "pid_generic": "BOOK:Q7GTD/CHAPTER:01",
-                "title_pid_generic": "BOOK:Q7GTD",
-                "user_session_id": "session1",
-                "click_timestamps": {"00:05": 1},
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "access_date": "2024-01-15",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "book",
-                    "source_id": "q7gtd",
-                    "scielo_issn": DEFAULT_SCIELO_ISSN,
-                    "main_title": "Book Title",
-                    "identifiers": {"book_id": "q7gtd"},
-                    "publisher_name": ["SciELO Books"],
-                },
-                "publication_year": "2023",
-            },
-            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:02|session1|BR|en|html|full_text": {
-                "collection": "books",
-                "source_key": "q7gtd",
-                "document_type": "chapter",
-                "pid_generic": "BOOK:Q7GTD/CHAPTER:02",
-                "title_pid_generic": "BOOK:Q7GTD",
-                "user_session_id": "session1",  # SAME SESSION
-                "click_timestamps": {"00:10": 1},
-                "content_type": CONTENT_TYPE_FULL_TEXT,
-                "access_date": "2024-01-15",
-                "access_year": "2024",
-                "source": {
-                    "source_type": "book",
-                    "source_id": "q7gtd",
-                    "scielo_issn": DEFAULT_SCIELO_ISSN,
-                    "main_title": "Book Title",
-                    "identifiers": {"book_id": "q7gtd"},
-                    "publisher_name": ["SciELO Books"],
-                },
-                "publication_year": "2023",
-            },
-        }
-
-        metrics_data = index_docs.convert_raw_results_to_index_documents(data)
-
-        # Should have 2 item documents (one per chapter) + 2 title documents (month and year)
-        self.assertEqual(len(metrics_data["month"]), 3)  # 2 items + 1 title
-        self.assertEqual(len(metrics_data["year"]), 3)  # 2 items + 1 title
-
-        # Each item should have total=1, unique=1
-        month_item_1 = metrics_data["month"][
-            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:01|2024-01|Open|Regular|2023"
-        ]
-        self.assertEqual(month_item_1["total_requests"], 1)
-        self.assertEqual(month_item_1["unique_requests"], 1)
-
-        month_item_2 = metrics_data["month"][
-            "books|q7gtd|||BOOK:Q7GTD/CHAPTER:02|2024-01|Open|Regular|2023"
-        ]
-        self.assertEqual(month_item_2["total_requests"], 1)
-        self.assertEqual(month_item_2["unique_requests"], 1)
-
-        # Title should have total=2 (sum of both chapters)
-        # Title unique should be 1 (same session accessed book, counted once)
-        month_title = metrics_data["month"][
-            "title|books|q7gtd|||BOOK:Q7GTD|2024-01|Open|Regular|2023"
-        ]
-        self.assertEqual(month_title["total_requests"], 2)
-        self.assertEqual(month_title["total_investigations"], 2)
-        self.assertEqual(month_title["unique_requests"], 1)
-        self.assertEqual(month_title["unique_investigations"], 1)
-
-    def test_export_book_r51_monthly_metrics_writes_counter_title_columns(self):
-        from metrics.management.commands.export_book_r51_monthly_metrics import Command
-
-        command = Command()
-        monthly_documents = command._build_monthly_documents(
-            {
-                "books|c2248|||BOOK:C2248/CHAPTER:03|sess|BR|pt|pdf|full_text": {
-                    "collection": "books",
-                    "source_key": "c2248",
-                    "document_type": "chapter",
-                    "pid_v2": None,
-                    "pid_v3": None,
-                    "pid_generic": "BOOK:C2248/CHAPTER:03",
-                    "title_pid_generic": "BOOK:C2248",
-                    "user_session_id": "browser|1.0|127.0.0.1|2024-01-15|10",
-                    "click_timestamps": {"00:05": 1},
-                    "access_country_code": "BR",
-                    "content_language": "pt",
-                    "content_type": CONTENT_TYPE_FULL_TEXT,
-                    "access_date": "2024-01-15",
-                    "access_year": "2024",
-                    "source": {
-                        "source_type": "book",
-                        "source_id": "c2248",
-                        "main_title": "C2248 Book",
-                        "identifiers": {"book_id": "c2248"},
-                        "publisher_name": ["SciELO Books"],
-                    },
-                    "publication_year": "2018",
-                }
-            }
-        )
-
-        with TemporaryDirectory() as tmpdir:
-            title_path = Path(tmpdir) / "title.csv"
-            command._write_title_csv(title_path, monthly_documents["title"])
-
-            with title_path.open(newline="") as fh:
-                reader = csv.DictReader(fh)
-                rows = list(reader)
-
-        self.assertEqual(
-            reader.fieldnames,
-            [
-                "year_month",
-                "title_pid_generic",
-                "document_type",
-                "total_item_requests",
-                "total_item_investigations",
-                "unique_title_requests",
-                "unique_title_investigations",
-            ],
-        )
-        self.assertNotIn("total_title_requests", reader.fieldnames)
-        self.assertEqual(rows[0]["year_month"], "2024-01")
-        self.assertEqual(rows[0]["total_item_requests"], "1")
-        self.assertEqual(rows[0]["unique_title_requests"], "1")
diff --git a/metrics/views.py b/metrics/views.py
deleted file mode 100755
index 91ea44a..0000000
--- a/metrics/views.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from django.shortcuts import render
-
-# Create your views here.
diff --git a/metrics/wagtail_hooks.py b/metrics/wagtail_hooks.py
index 94c2ffb..82b6d52 100644
--- a/metrics/wagtail_hooks.py
+++ b/metrics/wagtail_hooks.py
@@ -3,6 +3,7 @@
 
 from metrics.models import DailyMetricJob
 
+
 class DailyMetricJobSnippetViewSet(SnippetViewSet):
     model = DailyMetricJob
     menu_label = _("Daily Metric Jobs")
diff --git a/pytest.ini b/pytest.ini
index c2b3a23..e59c146 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,4 @@
 [pytest]
 addopts = --ds=config.settings.test --reuse-db
 python_files = tests.py test_*.py
+norecursedirs = src
diff --git a/reports/models.py b/reports/models.py
index 3af1ec8..6311034 100644
--- a/reports/models.py
+++ b/reports/models.py
@@ -31,6 +31,7 @@ def pct_validated(self):
         if not self.total_files:
             return 0
         return round(self.validated_files / self.total_files * 100, 1)
+
     pct_validated.fget.short_description = _("% Valid Files")
 
     @property
@@ -38,6 +39,7 @@ def pct_valid_lines(self):
         if not self.lines_parsed:
             return 0
         return round(self.valid_lines / self.lines_parsed * 100, 1)
+
     pct_valid_lines.fget.short_description = _("% Valid Lines")
 
     @property
@@ -46,6 +48,7 @@ def pct_remote_ip(self):
         if not total:
             return 0
         return round(self.ip_remote_count / total * 100, 1)
+
     pct_remote_ip.fget.short_description = _("% Remote IP")
 
     def __str__(self):
diff --git a/reports/services/__init__.py b/reports/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/reports/services/dates.py b/reports/services/dates.py
new file mode 100644
index 0000000..01c425e
--- /dev/null
+++ b/reports/services/dates.py
@@ -0,0 +1,26 @@
+import re
+
+from core.utils import date_utils
+
+
+def get_report_date_from_log_file(log_file):
+    if log_file.date:
+        return log_file.date
+
+    validation_date = (log_file.validation or {}).get("probably_date")
+    if isinstance(validation_date, str) and validation_date:
+        return date_utils.get_date_obj(validation_date)
+
+    return _get_report_date_from_log_file_path(log_file.path)
+
+
+def _get_report_date_from_log_file_path(path):
+    try:
+        match = re.search(r"(\d{4}-\d{2}-\d{2})", path)
+    except TypeError:
+        return None
+
+    if not match:
+        return None
+
+    return date_utils.get_date_obj(match.group(1))
diff --git a/reports/services/emails.py b/reports/services/emails.py
new file mode 100644
index 0000000..9897458
--- /dev/null
+++ b/reports/services/emails.py
@@ -0,0 +1,164 @@
+import logging
+
+from django.conf import settings
+from django.core.mail import send_mail
+from django.utils.translation import gettext as _
+
+from collection.models import Collection
+from core.utils import date_utils
+from log_manager_config import models as lmc_models
+from reports.models import MonthlyLogReport
+
+
+def send_log_report_summary_emails(
+    collections=None,
+    from_date=None,
+    until_date=None,
+    days_to_go_back=None,
+):
+    from_date_str, until_date_str = date_utils.get_date_range_str(
+        from_date,
+        until_date,
+        days_to_go_back,
+    )
+    subject = _("Usage Log Report Summary " f"({from_date_str} to {until_date_str})")
+
+    for collection_acron in collections or Collection.acron3_list():
+        try:
+            collection = Collection.objects.get(acron3=collection_acron)
+        except Collection.DoesNotExist:
+            logging.warning("Collection not found: %s", collection_acron)
+            continue
+
+        message = _build_collection_log_report_email_message(
+            collection,
+            from_date_str,
+            until_date_str,
+        )
+
+        if not message:
+            continue
+
+        logging.info(
+            "Sending email to collection %s. Subject: %s.",
+            collection.main_name,
+            subject,
+        )
+
+        _send_log_report_email_to_collection(subject, message, collection_acron)
+
+
+def _build_collection_log_report_email_message(
+    collection,
+    from_date_str,
+    until_date_str,
+):
+    monthly_reports = _list_latest_monthly_reports(collection)
+    if not monthly_reports:
+        return ""
+
+    latest_report = monthly_reports[0]
+    message = _build_current_month_report_section(
+        collection,
+        latest_report,
+        from_date_str,
+        until_date_str,
+    )
+
+    if len(monthly_reports) > 1:
+        previous_report = monthly_reports[1]
+        message += _build_previous_month_report_section(previous_report)
+        message += _build_month_over_month_report_section(
+            latest_report,
+            previous_report,
+        )
+
+    message += "\n---\nThis report is automatically generated by SciELO Usage.\n"
+    return message
+
+
+def _list_latest_monthly_reports(collection):
+    return list(
+        MonthlyLogReport.objects.filter(
+            collection=collection,
+        ).order_by(
+            "-year", "-month"
+        )[:2]
+    )
+
+
+def _build_current_month_report_section(
+    collection,
+    report,
+    from_date_str,
+    until_date_str,
+):
+    message = _(
+        f"Usage Log Report for {collection.acron3}\n"
+        f"Period: {from_date_str} to {until_date_str}\n\n"
+    )
+    message += _("Latest month ({latest}):\n").format(latest=report.period_label)
+    message += (
+        f"  Total files: {report.total_files}\n"
+        f"  Validated files: {report.validated_files} ({report.pct_validated}%)\n"
+        f"  Invalidated files: {report.invalidated_files}\n"
+        f"  Errored files: {report.errored_files}\n"
+        f"  Lines parsed: {report.lines_parsed}\n"
+        f"  Valid lines: {report.valid_lines} ({report.pct_valid_lines}%)\n"
+        f"  Discarded lines: {report.discarded_lines}\n"
+        f"  Remote IPs: {report.ip_remote_count} ({report.pct_remote_ip}%)\n"
+        f"  Local IPs: {report.ip_local_count}\n"
+    )
+    return message
+
+
+def _build_previous_month_report_section(report):
+    message = _("\nPrevious month ({prev}):\n").format(prev=report.period_label)
+    message += (
+        f"  Total files: {report.total_files}\n"
+        f"  Validated files: {report.validated_files} ({report.pct_validated}%)\n"
+        f"  Valid lines: {report.valid_lines} ({report.pct_valid_lines}%)\n"
+        f"  Remote IPs: {report.ip_remote_count} ({report.pct_remote_ip}%)\n"
+    )
+    return message
+
+
+def _build_month_over_month_report_section(latest_report, previous_report):
+    if not previous_report.total_files:
+        return ""
+
+    file_diff = latest_report.total_files - previous_report.total_files
+    line_diff = latest_report.lines_parsed - previous_report.lines_parsed
+
+    message = _("\nMonth-over-month change:\n")
+    message += f"  Files: {file_diff:+d}\n"
+    message += f"  Lines: {line_diff:+d}\n"
+    return message
+
+
+def _send_log_report_email_to_collection(subject, message, collection_acron):
+    emails = lmc_models.CollectionEmail.objects.filter(
+        config__collection__acron3=collection_acron,
+        active=True,
+    ).values_list("email", flat=True)
+
+    if not emails:
+        logging.error(
+            "Error. Please, add an E-mail Configuration for the collection %s.",
+            collection_acron,
+        )
+        return
+
+    try:
+        send_mail(
+            subject=subject,
+            message=message,
+            from_email=settings.DEFAULT_FROM_EMAIL,
+            recipient_list=list(emails),
+        )
+    except Exception as e:
+        logging.error(
+            "Error sending log files report for %s: %s",
+            collection_acron,
+            e,
+        )
diff --git a/reports/services/log_report.py b/reports/services/log_report.py
new file mode 100644
index 0000000..18dad6a
--- /dev/null
+++ b/reports/services/log_report.py
@@ -0,0 +1,164 @@
+import logging
+from collections import defaultdict
+
+from log_manager import choices
+from log_manager.models import LogFile
+from reports.models import MonthlyLogReport, WeeklyLogReport, YearlyLogReport
+from reports.services.dates import get_report_date_from_log_file
+
+VALIDATED_FILE_STATUSES = {
+    choices.LOG_FILE_STATUS_QUEUED,
+    choices.LOG_FILE_STATUS_PARSING,
+    choices.LOG_FILE_STATUS_PROCESSED,
+}
+
+
+def populate_log_report_tables(year=None, collection_acron=None):
+    totals_by_period = _build_log_report_totals_by_period(
+        year=year,
+        collection_acron=collection_acron,
+    )
+
+    weekly_count = _upsert_log_report_records(
+        WeeklyLogReport,
+        totals_by_period["weekly"],
+    )
+    monthly_count = _upsert_log_report_records(
+        MonthlyLogReport,
+        totals_by_period["monthly"],
+    )
+    yearly_count = _upsert_log_report_records(
+        YearlyLogReport,
+        totals_by_period["yearly"],
+    )
+
+    logging.info(
+        "Reports populated: %s weekly, %s monthly, %s yearly.",
+        weekly_count,
+        monthly_count,
+        yearly_count,
+    )
+
+    return f"Weekly: {weekly_count}, Monthly: {monthly_count}, Yearly: {yearly_count}"
+
+
+def _build_log_report_totals_by_period(year=None, collection_acron=None):
+    totals_by_period = {
+        "weekly": defaultdict(lambda: defaultdict(int)),
+        "monthly": defaultdict(lambda: defaultdict(int)),
+        "yearly": defaultdict(lambda: defaultdict(int)),
+    }
+
+    for log_file in _iter_reportable_log_files(collection_acron=collection_acron):
+        report_date = get_report_date_from_log_file(log_file)
+        if not report_date:
+            continue
+
+        if year and report_date.year != int(year):
+            continue
+
+        _add_log_file_to_period_totals(totals_by_period, log_file, report_date)
+
+    return totals_by_period
+
+
+def _iter_reportable_log_files(collection_acron=None):
+    queryset = LogFile.objects.select_related("collection")
+
+    if collection_acron:
+        queryset = queryset.filter(collection__acron3=collection_acron)
+
+    queryset = queryset.only(
+        "id",
+        "collection_id",
+        "date",
+        "path",
+        "status",
+        "summary",
+        "validation",
+    )
+
+    return queryset.iterator(chunk_size=2000)
+
+
+def _add_log_file_to_period_totals(totals_by_period, log_file, report_date):
+    iso_year, iso_week, _ = report_date.isocalendar()
+
+    period_keys = {
+        "weekly": (log_file.collection_id, iso_year, iso_week),
+        "monthly": (log_file.collection_id, report_date.year, report_date.month),
+        "yearly": (log_file.collection_id, report_date.year),
+    }
+
+    for period_name, period_key in period_keys.items():
+        totals = totals_by_period[period_name][period_key]
+        _add_log_file_metrics_to_totals(totals, log_file)
+
+
+def _add_log_file_metrics_to_totals(totals, log_file):
+    totals["total_files"] += 1
+
+    _add_log_file_status_to_totals(totals, log_file.status)
+    _add_log_file_line_counts_to_totals(totals, log_file.summary or {})
+    _add_log_file_ip_counts_to_totals(totals, log_file.validation or {})
+
+
+def _add_log_file_status_to_totals(totals, status):
+    if status == choices.LOG_FILE_STATUS_CREATED:
+        totals["created_files"] += 1
+        return
+
+    if status in VALIDATED_FILE_STATUSES:
+        totals["validated_files"] += 1
+        return
+
+    if status == choices.LOG_FILE_STATUS_INVALIDATED:
+        totals["invalidated_files"] += 1
+        return
+
+    if status == choices.LOG_FILE_STATUS_ERROR:
+        totals["errored_files"] += 1
+
+
+def _add_log_file_line_counts_to_totals(totals, summary):
+    lines_parsed = summary.get("lines_parsed", 0) or 0
+    valid_lines = summary.get("valid_lines", 0) or 0
+
+    totals["lines_parsed"] += lines_parsed
+    totals["valid_lines"] += valid_lines
+    totals["discarded_lines"] += max(lines_parsed - valid_lines, 0)
+
+
+def _add_log_file_ip_counts_to_totals(totals, validation):
+    ip_counts = validation.get("content", {}).get("summary", {}).get("ips", {})
+
+    totals["ip_local_count"] += ip_counts.get("local", 0) or 0
+    totals["ip_remote_count"] += ip_counts.get("remote", 0) or 0
+    totals["ip_unknown_count"] += ip_counts.get("unknown", 0) or 0
+
+
+def _upsert_log_report_records(model_class, totals_by_key):
+    count = 0
+    period_fields = _get_report_model_period_fields(model_class)
+
+    for period_key, totals in totals_by_key.items():
+        lookup = _build_log_report_record_lookup(period_fields, period_key)
+        model_class.objects.update_or_create(defaults=totals, **lookup)
+        count += 1
+
+    return count
+
+
+def _get_report_model_period_fields(model_class):
+    unique_fields = list(model_class._meta.unique_together[0])
+    return unique_fields[1:]
+
+
+def _build_log_report_record_lookup(period_fields, period_key):
+    lookup = {"collection_id": period_key[0]}
+    period_values = period_key[1:]
+
+    for idx, field_name in enumerate(period_fields):
+        lookup[field_name] = period_values[idx]
+
+    return lookup
diff --git a/reports/tasks.py b/reports/tasks.py
index 6a70048..81894e4 100644
--- a/reports/tasks.py
+++ b/reports/tasks.py
@@ -1,121 +1,13 @@
-import logging
-import re
-from collections import defaultdict
-
-from django.core.mail import send_mail
-from django.conf import settings
-from django.utils.translation import gettext as _
-
 from config import celery_app
-from core.utils import date_utils
-from collection.models import Collection
-from log_manager import choices
-from log_manager.models import LogFile
-from log_manager_config import models as lmc_models
-
-from reports.models import WeeklyLogReport, MonthlyLogReport, YearlyLogReport
-
-
-def _extract_date_from_log_file(lf):
-    if lf.date:
-        return lf.date
-
-    probably_date = (lf.validation or {}).get("probably_date")
-    if isinstance(probably_date, str) and probably_date:
-        return date_utils.get_date_obj(probably_date)
-
-    try:
-        match = re.search(r"(\d{4}-\d{2}-\d{2})", lf.path)
-        if match:
-            return date_utils.get_date_obj(match.group(1))
-    except Exception:
-        pass
-
-    return None
+from reports.services import emails, log_report
 
 
 @celery_app.task(bind=True, name="[Reports] Populate All Reports")
 def task_populate_all_reports(self, year=None, collection_acron=None):
-    qs = LogFile.objects.select_related("collection")
-    if collection_acron:
-        qs = qs.filter(collection__acron3=collection_acron)
-    qs = qs.only(
-        "id", "collection_id", "date", "path", "status", "summary", "validation"
-    )
-
-    weekly = defaultdict(lambda: defaultdict(int))
-    monthly = defaultdict(lambda: defaultdict(int))
-    yearly = defaultdict(lambda: defaultdict(int))
-
-    for lf in qs.iterator(chunk_size=2000):
-        extracted_date = _extract_date_from_log_file(lf)
-        if not extracted_date:
-            continue
-        if year and extracted_date.year != int(year):
-            continue
-
-        iso_year, iso_week, _ = extracted_date.isocalendar()
-        yr = extracted_date.year
-        mo = extracted_date.month
-
-        for agg, key in [
-            (weekly, (lf.collection_id, iso_year, iso_week)),
-            (monthly, (lf.collection_id, yr, mo)),
-            (yearly, (lf.collection_id, yr)),
-        ]:
-            r = agg[key]
-            r["total_files"] += 1
-            st = lf.status
-            if st == "CRE":
-                r["created_files"] += 1
-            elif st in ("QUE", "PAR", "PRO"):
-                r["validated_files"] += 1
-            elif st == "INV":
-                r["invalidated_files"] += 1
-            elif st == "ERR":
-                r["errored_files"] += 1
-
-            s = lf.summary or {}
-            lp = s.get("lines_parsed", 0) or 0
-            vl = s.get("valid_lines", 0) or 0
-            r["lines_parsed"] += lp
-            r["valid_lines"] += vl
-            r["discarded_lines"] += max(lp - vl, 0)
-
-            ips = (
-                (lf.validation or {})
-                .get("content", {})
-                .get("summary", {})
-                .get("ips", {})
-            )
-            r["ip_local_count"] += ips.get("local", 0) or 0
-            r["ip_remote_count"] += ips.get("remote", 0) or 0
-            r["ip_unknown_count"] += ips.get("unknown", 0) or 0
-
-    w_count = _upsert_reports(WeeklyLogReport, weekly)
-    m_count = _upsert_reports(MonthlyLogReport, monthly)
-    y_count = _upsert_reports(YearlyLogReport, yearly)
-
-    logging.info(
-        "Reports populated: %s weekly, %s monthly, %s yearly.",
-        w_count, m_count, y_count,
+    return log_report.populate_log_report_tables(
+        year=year,
+        collection_acron=collection_acron,
     )
-    return f"Weekly: {w_count}, Monthly: {m_count}, Yearly: {y_count}"
-
-
-def _upsert_reports(model_class, data):
-    count = 0
-    unique_fields = list(model_class._meta.unique_together[0])
-    period_fields = unique_fields[1:]
-    for key, fields in data.items():
-        coll_id = key[0]
-        period_values = key[1:]
-        lookup = {"collection_id": coll_id}
-        for idx, field_name in enumerate(period_fields):
-            lookup[field_name] = period_values[idx]
-        model_class.objects.update_or_create(defaults=fields, **lookup)
-        count += 1
-    return count
 
 
 @celery_app.task(
@@ -132,107 +24,9 @@ def task_log_files_count_status_report(
     user_id=None,
     username=None,
 ):
-    from_date_str, until_date_str = date_utils.get_date_range_str(
-        from_date, until_date, days_to_go_back
-    )
-    subject = _(
-        "Usage Log Report Summary "
-        f"({from_date_str} to {until_date_str})"
+    return emails.send_log_report_summary_emails(
+        collections=collections,
+        from_date=from_date,
+        until_date=until_date,
+        days_to_go_back=days_to_go_back,
     )
-
-    for collection_acron in (collections or Collection.acron3_list()):
-        try:
-            collection = Collection.objects.get(acron3=collection_acron)
-        except Collection.DoesNotExist:
-            logging.warning("Collection not found: %s", collection_acron)
-            continue
-
-        message = _build_report_message(
-            collection,
-            from_date_str,
-            until_date_str,
-        )
-
-        if not message:
-            continue
-
-        logging.info(
-            "Sending email to collection %s. Subject: %s.",
-            collection.main_name, subject,
-        )
-
-        _send_collection_email(subject, message, collection_acron)
-
-
-def _build_report_message(collection, from_date_str, until_date_str):
-    monthly = MonthlyLogReport.objects.filter(
-        collection=collection,
-    ).order_by("-year", "-month")
-
-    if not monthly.exists():
-        return ""
-
-    latest = monthly.first()
-    message = _(
-        f"Usage Log Report for {collection.acron3}\n"
-        f"Period: {from_date_str} to {until_date_str}\n\n"
-    )
-    message += _("Latest month ({latest}):\n").format(latest=latest.period_label)
-    message += (
-        f"  Total files: {latest.total_files}\n"
-        f"  Validated files: {latest.validated_files} ({latest.pct_validated}%)\n"
-        f"  Invalidated files: {latest.invalidated_files}\n"
-        f"  Errored files: {latest.errored_files}\n"
-        f"  Lines parsed: {latest.lines_parsed}\n"
-        f"  Valid lines: {latest.valid_lines} ({latest.pct_valid_lines}%)\n"
-        f"  Discarded lines: {latest.discarded_lines}\n"
-        f"  Remote IPs: {latest.ip_remote_count} ({latest.pct_remote_ip}%)\n"
-        f"  Local IPs: {latest.ip_local_count}\n"
-    )
-
-    prev_month = latest
-    if len(monthly) > 1:
-        prev_month = monthly[1]
-        message += _("\nPrevious month ({prev}):\n").format(prev=prev_month.period_label)
-        message += (
-            f"  Total files: {prev_month.total_files}\n"
-            f"  Validated files: {prev_month.validated_files} ({prev_month.pct_validated}%)\n"
-            f"  Valid lines: {prev_month.valid_lines} ({prev_month.pct_valid_lines}%)\n"
-            f"  Remote IPs: {prev_month.ip_remote_count} ({prev_month.pct_remote_ip}%)\n"
-        )
-
-        if prev_month.total_files:
-            file_diff = latest.total_files - prev_month.total_files
-            line_diff = latest.lines_parsed - prev_month.lines_parsed
-            message += _("\nMonth-over-month change:\n")
-            message += f"  Files: {file_diff:+d}\n"
-            message += f"  Lines: {line_diff:+d}\n"
-
-    message += (
-        f"\n---\n"
-        f"This report is automatically generated by SciELO Usage.\n"
-    )
-    return message
-
-
-def _send_collection_email(subject, message, collection):
-    emails = lmc_models.CollectionEmail.objects.filter(
-        config__collection__acron3=collection, active=True
-    ).values_list("email", flat=True)
-
-    if not emails:
-        logging.error(
-            "Error. Please, add an E-mail Configuration for the collection %s.",
-            collection,
-        )
-        return
-
-    try:
-        send_mail(
-            subject=subject,
-            message=message,
-            from_email=settings.DEFAULT_FROM_EMAIL,
-            recipient_list=list(emails),
-        )
-    except Exception as e:
-        logging.error("Error sending log files report for %s: %s", collection, e)
diff --git a/reports/tests/__init__.py b/reports/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/reports/tests/test_services.py b/reports/tests/test_services.py
new file mode 100644
index 0000000..34d25d4
--- /dev/null
+++ b/reports/tests/test_services.py
@@ -0,0 +1,70 @@
+from datetime import date
+
+from django.test import TestCase
+
+from collection.models import Collection
+from log_manager import choices
+from log_manager.models import LogFile
+from reports.services import dates, log_report
+
+
+class DateServiceTests(TestCase):
+    def test_get_report_date_from_log_file_uses_validation_probably_date(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        log_file = LogFile(
+            collection=collection,
+            path="/tmp/access.log",
+            stat_result={},
+            hash="1" * 32,
+            status=choices.LOG_FILE_STATUS_CREATED,
+            validation={"probably_date": "2026-05-10"},
+        )
+
+        self.assertEqual(
+            dates.get_report_date_from_log_file(log_file),
+            date(2026, 5, 10),
+        )
+
+
+class LogReportServiceTests(TestCase):
+    def test_populate_log_report_tables_aggregates_log_files(self):
+        from reports.models import MonthlyLogReport, WeeklyLogReport, YearlyLogReport
+
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        LogFile.objects.create(
+            collection=collection,
+            path="/tmp/access-1.log",
+            stat_result={},
+            hash="1" * 32,
+            status=choices.LOG_FILE_STATUS_QUEUED,
+            date=date(2026, 5, 10),
+            summary={"lines_parsed": 10, "valid_lines": 7},
+            validation={
+                "content": {
+                    "summary": {
+                        "ips": {"local": 1, "remote": 2, "unknown": 3},
+                    }
+                }
+            },
+        )
+
+        result = log_report.populate_log_report_tables(
+            year=2026,
+            collection_acron="books",
+        )
+
+        self.assertEqual(result, "Weekly: 1, Monthly: 1, Yearly: 1")
+
+        weekly = WeeklyLogReport.objects.get(collection=collection)
+        monthly = MonthlyLogReport.objects.get(collection=collection)
+        yearly = YearlyLogReport.objects.get(collection=collection)
+
+        for report in [weekly, monthly, yearly]:
+            self.assertEqual(report.total_files, 1)
+            self.assertEqual(report.validated_files, 1)
+            self.assertEqual(report.lines_parsed, 10)
+            self.assertEqual(report.valid_lines, 7)
+            self.assertEqual(report.discarded_lines, 3)
+            self.assertEqual(report.ip_local_count, 1)
+            self.assertEqual(report.ip_remote_count, 2)
+            self.assertEqual(report.ip_unknown_count, 3)
diff --git a/reports/wagtail_hooks.py b/reports/wagtail_hooks.py
index b2aeac7..5e9e76b 100644
--- a/reports/wagtail_hooks.py
+++ b/reports/wagtail_hooks.py
@@ -1,10 +1,10 @@
 from django.contrib.auth import get_user_model
 from django.utils.translation import gettext_lazy as _
-from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
-from wagtail.snippets.models import register_snippet
 from wagtail.permission_policies.base import BasePermissionPolicy
+from wagtail.snippets.models import register_snippet
+from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
 
-from reports.models import WeeklyLogReport, MonthlyLogReport, YearlyLogReport
+from reports.models import MonthlyLogReport, WeeklyLogReport, YearlyLogReport
 
 
 class ReadOnlyPermissionPolicy(BasePermissionPolicy):
diff --git a/requirements/base.txt b/requirements/base.txt
index 7b5ed61..5186a11 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -63,13 +63,13 @@ minio==7.2.7
 reverse-geocode==1.6  # https://pypi.org/project/reverse-geocode/
 
 # SciELO Log Validator
--e git+https://github.com/scieloorg/scielo_log_validator@2.0.0#egg=scielo_log_validator
+git+https://github.com/scieloorg/scielo_log_validator@2.0.0#egg=scielo_log_validator
 
 # SciELO Scholarly Data
--e git+https://github.com/scieloorg/scielo_scholarly_data@v0.1.4#egg=scielo_scholarly_data
+git+https://github.com/scieloorg/scielo_scholarly_data@v0.1.4#egg=scielo_scholarly_data
 
 # SciELO Usage COUNTER
--e git+https://github.com/scieloorg/scielo_usage_counter@2.0.0#egg=scielo_usage_counter
+git+https://github.com/scieloorg/scielo_usage_counter@2.0.0#egg=scielo_usage_counter
 
 # Device Detector
 device-detector==0.10  # https://github.com/thinkwelltwd/device_detector
diff --git a/requirements/production.txt b/requirements/production.txt
index 334e9f1..b580f8a 100644
--- a/requirements/production.txt
+++ b/requirements/production.txt
@@ -4,7 +4,7 @@
 
 gevent==23.9.1  # http://www.gevent.org/
 gunicorn==21.2.0 # https://github.com/benoitc/gunicorn
-psycopg2-binary==2.9.9  # https://github.com/psycopg/psycopg2
+psycopg2==2.9.9  # https://github.com/psycopg/psycopg2
 sentry-sdk==1.39.1  # https://github.com/getsentry/sentry-python
 
 # Django
diff --git a/resources/admin.py b/resources/admin.py
index 8c38f3f..846f6b4 100644
--- a/resources/admin.py
+++ b/resources/admin.py
@@ -1,3 +1 @@
-from django.contrib import admin
-
 # Register your models here.
diff --git a/resources/models.py b/resources/models.py
index 22663e2..8021d6f 100644
--- a/resources/models.py
+++ b/resources/models.py
@@ -4,6 +4,7 @@
 from django.utils.translation import gettext_lazy as _
 from wagtail.admin.panels import FieldPanel
 
+
 class RobotUserAgent(models.Model):
     SOURCE_ALL = "all"
     SOURCE_COUNTER = "counter"
@@ -23,7 +24,7 @@ class RobotUserAgent(models.Model):
     updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
 
     pattern = models.CharField(
-        verbose_name=_('Pattern'),
+        verbose_name=_("Pattern"),
         max_length=255,
         null=False,
         blank=False,
@@ -51,7 +52,7 @@ class RobotUserAgent(models.Model):
         blank=True,
     )
     last_changed = models.DateField(
-        verbose_name=_('Last Changed'),
+        verbose_name=_("Last Changed"),
         null=True,
         blank=True,
     )
@@ -101,20 +102,20 @@ class MMDB(models.Model):
     created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
     updated = models.DateTimeField(verbose_name=_("Last update date"), auto_now=True)
     id = models.CharField(
-        verbose_name=_('ID (HASH)'),
-        max_length=64, 
+        verbose_name=_("ID (HASH)"),
+        max_length=64,
         primary_key=True,
     )
     data = models.BinaryField(
-        verbose_name=_('MMDB Data'),
+        verbose_name=_("MMDB Data"),
     )
     url = models.URLField(
-        verbose_name=_('URL'),
+        verbose_name=_("URL"),
         max_length=255,
         null=True,
         blank=True,
     )
-    
+
     def save(self, *args, **kwargs):
         if self.data:
             self.id = MMDB.compute_hash(self.data)
@@ -125,4 +126,4 @@ def compute_hash(cls, data):
         return hashlib.sha256(data).hexdigest()
 
     def __str__(self):
-        return f'{self.id}'
+        return f"{self.id}"
diff --git a/resources/services.py b/resources/services.py
new file mode 100644
index 0000000..4f27b4f
--- /dev/null
+++ b/resources/services.py
@@ -0,0 +1,119 @@
+import logging
+
+from django.conf import settings
+
+from resources import models, utils
+
+
+def load_robots(url_robots=None):
+    if not url_robots:
+        url_robots = settings.COUNTER_ROBOTS_URL
+        logging.warning("No robots URL provided. Using default: %s", url_robots)
+
+    try:
+        robots_data = utils.fetch_data(url_robots, data_type="json")
+    except Exception as e:
+        logging.error("Error downloading robots: %s", e)
+        return False
+
+    cleaned_robots_data = utils.clean_robots_list(robots_data)
+    fetched_patterns = set()
+
+    try:
+        for r_str in cleaned_robots_data:
+            pattern = r_str.get("pattern")
+            last_changed = r_str.get("last_changed")
+            fetched_patterns.add(pattern)
+
+            r_obj = models.RobotUserAgent.objects.filter(pattern=pattern).first()
+            created = r_obj is None
+
+            if created:
+                r_obj = models.RobotUserAgent(
+                    pattern=pattern,
+                    source_counter=True,
+                    source_scielo=False,
+                )
+            r_obj.source_counter = True
+            r_obj.is_active = True
+            r_obj.source_url = url_robots
+            r_obj.last_changed = last_changed
+
+            r_obj.save()
+            logging.debug("Robot saved: %s", r_obj)
+
+        stale_counter_patterns = models.RobotUserAgent.objects.filter(
+            source_counter=True
+        ).exclude(pattern__in=fetched_patterns)
+
+        for r_obj in stale_counter_patterns:
+            r_obj.source_counter = False
+            r_obj.source_url = None
+            r_obj.last_changed = None
+            if not r_obj.source_scielo:
+                r_obj.is_active = False
+            r_obj.save()
+            logging.debug(
+                "Robot deactivated or detached from COUNTER source: %s", r_obj
+            )
+
+        return True
+
+    except Exception as e:
+        logging.error("Error saving robots: %s", e)
+        return False
+
+
+def load_geoip(url_geoip=None, validate=True):
+    if url_geoip:
+        candidates = [url_geoip]
+    else:
+        candidates = utils.resolve_mmdb_url()
+        logging.info("No GeoIP URL provided. Will try candidates: %s", candidates)
+
+    data = None
+    resolved_url = None
+    for url in candidates:
+        try:
+            data = utils.fetch_data(url, data_type="content")
+            resolved_url = url
+            logging.info("GeoIP data downloaded from: %s", url)
+            break
+        except Exception as e:
+            logging.warning(
+                "Failed to download GeoIP from %s: %s. Trying next candidate.", url, e
+            )
+
+    if data is None:
+        logging.error(
+            "Could not download GeoIP data from any candidate URL: %s", candidates
+        )
+        return False
+
+    try:
+        mmdb_data = utils.decompress_gzip(data)
+    except Exception as e:
+        logging.error("Error decompressing GeoIP data: %s", e)
+        return False
+
+    if validate:
+        try:
+            utils.validate_geoip_data(mmdb_data)
+        except Exception as e:
+            logging.error("Error validating GeoIP data: %s", e)
+            return False
+
+    mmdb_hash = models.MMDB.compute_hash(mmdb_data)
+
+    try:
+        mmdb_obj = models.MMDB.objects.get(id=mmdb_hash)
+        logging.debug("GeoIP data already exists: %s", mmdb_obj)
+
+    except models.MMDB.DoesNotExist:
+        mmdb_obj = models.MMDB.objects.create(id=mmdb_hash, data=mmdb_data)
+        mmdb_obj.url = resolved_url
+
+    mmdb_obj.save()
+    logging.info("GeoIP data saved (url=%s, hash=%s)", resolved_url, mmdb_hash)
+
+    return True
diff --git a/resources/tasks.py b/resources/tasks.py
index 0a87600..ea53c85 100644
--- a/resources/tasks.py
+++ b/resources/tasks.py
@@ -1,153 +1,12 @@
-import logging
-
-from django.conf import settings
-
 from config import celery_app
-
-from . import models, utils
+from resources import services
 
 
-@celery_app.task(bind=True, name='[Resources] Load Robots Data')
+@celery_app.task(bind=True, name="[Resources] Load Robots Data")
 def task_load_robots(self, url_robots=None):
-    """
-    Load robots from a given URL and save them to the database.
-    This function fetches robot data from a specified URL (or a default URL if none is provided),
-    cleans the data, and saves it to the database. If the robots already exist in the database,
-    their information is updated.
-    Args:
-        url_robots (str, optional): The URL to fetch the robots data from. Defaults to None.
-        user_id (int, optional): The ID of the user performing the task. Defaults to None.
-        username (str, optional): The username of the user performing the task. Defaults to None.
-    Returns:
-        bool: True if the robots were successfully loaded and saved, False otherwise.
-    Raises:
-        Exception: If there is an error fetching or saving the robots data.
-    Logs:
-        - Warning if no robots URL is provided.
-        - Error if there is an issue downloading or saving the robots.
-        - Debug information for each robot saved.
-    """
-    if not url_robots:
-        url_robots = settings.COUNTER_ROBOTS_URL
-        logging.warning(f'No robots URL provided. Using default: {url_robots}')
-
-    try:
-        robots_data = utils.fetch_data(url_robots, data_type='json')
-    except Exception as e:
-        logging.error(f'Error downloading robots: {e}')
-        return False
-
-    cleaned_robots_data = utils.clean_robots_list(robots_data)
-    fetched_patterns = set()
-
-    try:
-        for r_str in cleaned_robots_data:
-            pattern = r_str.get('pattern')
-            last_changed = r_str.get('last_changed')
-            fetched_patterns.add(pattern)
-
-            r_obj = models.RobotUserAgent.objects.filter(pattern=pattern).first()
-            created = r_obj is None
-
-            if created:
-                r_obj = models.RobotUserAgent(
-                    pattern=pattern,
-                    source_counter=True,
-                    source_scielo=False,
-                )
-            r_obj.source_counter = True
-            r_obj.is_active = True
-            r_obj.source_url = url_robots
-            r_obj.last_changed = last_changed
-
-            r_obj.save()
-            logging.debug(f'Robot saved: {r_obj}')
-
-        stale_counter_patterns = models.RobotUserAgent.objects.filter(
-            source_counter=True
-        ).exclude(pattern__in=fetched_patterns)
-
-        for r_obj in stale_counter_patterns:
-            r_obj.source_counter = False
-            r_obj.source_url = None
-            r_obj.last_changed = None
-            if not r_obj.source_scielo:
-                r_obj.is_active = False
-            r_obj.save()
-            logging.debug(f'Robot deactivated or detached from COUNTER source: {r_obj}')
-
-        return True
+    return services.load_robots(url_robots=url_robots)
 
-    except Exception as e:
-        logging.error(f'Error saving robots: {e}')
-        return False
 
-
-@celery_app.task(bind=True, name='[Resources] Load Geolocation Data')
+@celery_app.task(bind=True, name="[Resources] Load Geolocation Data")
 def task_load_geoip(self, url_geoip=None, validate=True):
-    """
-    Load GeoIP data from a specified URL, validate it, and save it to the database.
-
-    When ``url_geoip`` is not provided the task resolves the URL automatically:
-    it tries the current month first and, if the file is not yet available,
-    falls back to the previous month.
-
-    Args:
-        url_geoip (str, optional): Explicit URL to download. Defaults to None
-            (auto-resolved for the current/previous month).
-        validate (bool, optional): Whether to validate the GeoIP data. Defaults to True.
-    Returns:
-        bool: True if the GeoIP data was successfully loaded and saved, False otherwise.
-    """
-    if url_geoip:
-        candidates = [url_geoip]
-    else:
-        candidates = utils.resolve_mmdb_url()
-        logging.info('No GeoIP URL provided. Will try candidates: %s', candidates)
-
-    data = None
-    resolved_url = None
-    for url in candidates:
-        try:
-            data = utils.fetch_data(url, data_type='content')
-            resolved_url = url
-            logging.info('GeoIP data downloaded from: %s', url)
-            break
-        except Exception as e:
-            logging.warning(
-                'Failed to download GeoIP from %s: %s. Trying next candidate.', url, e
-            )
-
-    if data is None:
-        logging.error(
-            'Could not download GeoIP data from any candidate URL: %s', candidates
-        )
-        return False
-
-    try:
-        mmdb_data = utils.decompress_gzip(data)
-    except Exception as e:
-        logging.error(f'Error decompressing GeoIP data: {e}')
-        return False
-
-    if validate:
-        try:
-            utils.validate_geoip_data(mmdb_data)
-        except Exception as e:
-            logging.error(f'Error validating GeoIP data: {e}')
-            return False
-
-    mmdb_hash = models.MMDB.compute_hash(mmdb_data)
-
-    try:
-        mmdb_obj = models.MMDB.objects.get(id=mmdb_hash)
-        logging.debug(f'GeoIP data already exists: {mmdb_obj}')
-
-    except models.MMDB.DoesNotExist:
-        mmdb_obj = models.MMDB.objects.create(id=mmdb_hash, data=mmdb_data)
-        mmdb_obj.url = resolved_url
-
-    mmdb_obj.save()
-    logging.info('GeoIP data saved (url=%s, hash=%s)', resolved_url, mmdb_hash)
-
-    return True
+    return services.load_geoip(url_geoip=url_geoip, validate=validate)
diff --git a/resources/tests/__init__.py b/resources/tests/__init__.py
index 8b13789..e69de29 100644
--- a/resources/tests/__init__.py
+++ b/resources/tests/__init__.py
@@ -1 +0,0 @@
-
diff --git a/resources/tests/test_mmdb.py b/resources/tests/test_mmdb.py
new file mode 100644
index 0000000..8c73038
--- /dev/null
+++ b/resources/tests/test_mmdb.py
@@ -0,0 +1,22 @@
+from django.test import TestCase
+
+from resources.models import MMDB
+
+
+class MMDBModelTests(TestCase):
+    def test_save_computes_sha256_hash_as_pk(self):
+        data = b"fake mmdb binary data"
+        mmdb = MMDB(data=data, url="https://example.org/GeoLite2-Country.mmdb")
+        mmdb.save()
+
+        self.assertEqual(mmdb.pk, MMDB.compute_hash(data))
+        self.assertEqual(MMDB.objects.count(), 1)
+
+    def test_different_data_produces_different_hash(self):
+        mmdb1 = MMDB(data=b"data-v1")
+        mmdb1.save()
+        mmdb2 = MMDB(data=b"data-v2")
+        mmdb2.save()
+
+        self.assertNotEqual(mmdb1.pk, mmdb2.pk)
+        self.assertEqual(MMDB.objects.count(), 2)
diff --git a/resources/tests/test_robots.py b/resources/tests/test_robots.py
index 330d4db..847832e 100644
--- a/resources/tests/test_robots.py
+++ b/resources/tests/test_robots.py
@@ -29,7 +29,9 @@ def test_get_all_patterns_only_returns_active_patterns(self):
             is_active=False,
         )
 
-        self.assertListEqual(list(models.RobotUserAgent.get_all_patterns()), [active.pattern])
+        self.assertListEqual(
+            list(models.RobotUserAgent.get_all_patterns()), [active.pattern]
+        )
 
     def test_get_patterns_can_filter_by_source(self):
         counter_only = models.RobotUserAgent.objects.create(
@@ -66,8 +68,7 @@ def test_get_patterns_rejects_invalid_source(self):
 
 
 class LoadRobotsTaskTests(TestCase):
-
-    @patch("resources.tasks.utils.fetch_data")
+    @patch("resources.services.utils.fetch_data")
     @override_settings(COUNTER_ROBOTS_URL="https://settings.example.org/robots.json")
     def test_task_load_robots_uses_settings_url_when_not_provided(
         self,
@@ -91,7 +92,7 @@ def test_task_load_robots_uses_settings_url_when_not_provided(
             "https://settings.example.org/robots.json",
         )
 
-    @patch("resources.tasks.utils.fetch_data")
+    @patch("resources.services.utils.fetch_data")
     def test_task_load_robots_marks_counter_source_and_deactivates_stale_counter_entries(
         self,
         mock_fetch_data,
@@ -124,7 +125,9 @@ def test_task_load_robots_marks_counter_source_and_deactivates_stale_counter_ent
         self.assertTrue(counter_bot.source_counter)
         self.assertFalse(counter_bot.source_scielo)
         self.assertTrue(counter_bot.is_active)
-        self.assertEqual(counter_bot.source_url, "https://counter.example.org/robots.json")
+        self.assertEqual(
+            counter_bot.source_url, "https://counter.example.org/robots.json"
+        )
 
         shared_bot.refresh_from_db()
         self.assertTrue(shared_bot.source_counter)
diff --git a/resources/utils.py b/resources/utils.py
index c8d58fe..f47edfd 100644
--- a/resources/utils.py
+++ b/resources/utils.py
@@ -10,7 +10,7 @@
 from django.conf import settings
 
 
-def fetch_data(url, data_type='json', max_retries=5, sleep_time=30):
+def fetch_data(url, data_type="json", max_retries=5, sleep_time=30):
     """
     Retrieves data from the given URL.
 
@@ -43,18 +43,14 @@ def fetch_data(url, data_type='json', max_retries=5, sleep_time=30):
             response.raise_for_status()
         except requests.exceptions.HTTPError:
             logging.warning(
-                'Failed to retrieve data from %s. Waiting %d seconds before retry %d of %d' % (
-                    url, 
-                    sleep_time, 
-                    t, 
-                    max_retries
-                )
+                "Failed to retrieve data from %s. Waiting %d seconds before retry %d of %d"
+                % (url, sleep_time, t, max_retries)
             )
             sleep(sleep_time)
         else:
-            if data_type == 'json':
+            if data_type == "json":
                 return response.json()
-            elif data_type == 'content':
+            elif data_type == "content":
                 return response.content
             else:
                 raise ValueError("Invalid data_type. Expected 'json' or 'content'.")
@@ -76,7 +72,7 @@ def clean_robots_list(robots):
     """
     cleaned_robots = []
     for r in robots:
-        if r.get('pattern') and r.get('last_changed'):
+        if r.get("pattern") and r.get("last_changed"):
             cleaned_robots.append(r)
     return cleaned_robots
 
@@ -86,7 +82,7 @@ def decompress_gzip(data):
         with gzip.GzipFile(fileobj=io.BytesIO(data)) as f:
             return f.read()
     except Exception as e:
-        raise Exception(f'Error decompressing data: {e}')
+        raise Exception(f"Error decompressing data: {e}")
 
 
 def validate_geoip_data(data):
@@ -96,7 +92,7 @@ def validate_geoip_data(data):
             temp_file.flush()
             reader = geoip2.database.Reader(temp_file.name)
     except Exception as e:
-        raise Exception(f'Error validating GeoIP data: {e}')
+        raise Exception(f"Error validating GeoIP data: {e}")
     else:
         reader.close()
         return True
diff --git a/resources/wagtail_hooks.py b/resources/wagtail_hooks.py
index c347b22..808876a 100644
--- a/resources/wagtail_hooks.py
+++ b/resources/wagtail_hooks.py
@@ -1,10 +1,10 @@
 from django.utils.translation import gettext_lazy as _
-from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
 from wagtail.snippets.models import register_snippet
+from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
 
 from config.menu import get_menu_order
 
-from .models import (RobotUserAgent, MMDB)
+from resources.models import MMDB, RobotUserAgent
 
 
 class RobotUserAgentSnippetViewSet(SnippetViewSet):
@@ -52,11 +52,14 @@ class MMDBSnippetViewSet(SnippetViewSet):
 
 
 class ResourcesSnippetViewSetGroup(SnippetViewSetGroup):
-    menu_name = 'resources'
+    menu_name = "resources"
     menu_label = _("Resources")
     menu_icon = "folder-open-inverse"
     menu_order = get_menu_order("resources")
-    items = (RobotUserAgentSnippetViewSet, MMDBSnippetViewSet,)
+    items = (
+        RobotUserAgentSnippetViewSet,
+        MMDBSnippetViewSet,
+    )
 
 
 register_snippet(ResourcesSnippetViewSetGroup)
diff --git a/setup.cfg b/setup.cfg
index c4ae862..7b8b505 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,7 @@
 [flake8]
 max-line-length = 120
 exclude = .tox,.git,*/migrations/*,*/static/CACHE/*,docs,node_modules,venv
+ignore = E203, W503
 
 [pycodestyle]
 max-line-length = 120
diff --git a/source/__init__.py b/source/__init__.py
index 8b13789..e69de29 100644
--- a/source/__init__.py
+++ b/source/__init__.py
@@ -1 +0,0 @@
-
diff --git a/source/migrations/__init__.py b/source/migrations/__init__.py
index 8b13789..e69de29 100644
--- a/source/migrations/__init__.py
+++ b/source/migrations/__init__.py
@@ -1 +0,0 @@
-
diff --git a/source/models.py b/source/models.py
index 48d3e00..c88e9f0 100644
--- a/source/models.py
+++ b/source/models.py
@@ -1,4 +1,5 @@
 from django.db import models
+from django.db.models import Q
 from django.utils.translation import gettext_lazy as _
 
 from collection.models import Collection
@@ -144,6 +145,46 @@ class Source(CommonControlField):
     def __str__(self):
         return f"{self.collection.acron3} - {self.source_type} - {self.source_id}"
 
+    @classmethod
+    def delete_book_source_by_id(cls, collection, book_id):
+        return cls.objects.filter(
+            collection=collection,
+            source_type=cls.SOURCE_TYPE_BOOK,
+            source_id=str(book_id),
+        ).delete()
+
+    @classmethod
+    def find_journal_by_issns(cls, collection, issns):
+        for issn in filter(None, issns or []):
+            source = (
+                cls.objects.filter(
+                    collection=collection,
+                    source_type=cls.SOURCE_TYPE_JOURNAL,
+                )
+                .filter(
+                    Q(scielo_issn=issn)
+                    | Q(source_id=issn)
+                    | Q(identifiers__electronic_issn=issn)
+                    | Q(identifiers__print_issn=issn)
+                    | Q(identifiers__scielo_issn=issn)
+                )
+                .first()
+            )
+            if source:
+                return source
+        return None
+
+    @classmethod
+    def find_journal_by_acronym(cls, collection, acronym):
+        if not acronym:
+            return None
+
+        return cls.objects.filter(
+            collection=collection,
+            source_type=cls.SOURCE_TYPE_JOURNAL,
+            acronym=acronym,
+        ).first()
+
     @staticmethod
     def _extract_issns(identifiers):
         if not isinstance(identifiers, dict):
diff --git a/source/services/__init__.py b/source/services/__init__.py
index 8b13789..e69de29 100644
--- a/source/services/__init__.py
+++ b/source/services/__init__.py
@@ -1 +0,0 @@
-
diff --git a/source/services/books.py b/source/services/book.py
similarity index 73%
rename from source/services/books.py
rename to source/services/book.py
index df9bd4d..6fc2016 100644
--- a/source/services/books.py
+++ b/source/services/book.py
@@ -1,14 +1,7 @@
-from collection.models import Collection
+from core.utils.metadata import as_list, compact_dict, normalize_year
 from source.models import Source
 
 
-BOOKS_COLLECTION_ACRONYM = "books"
-
-
-def get_books_collection(acronym=BOOKS_COLLECTION_ACRONYM):
-    return Collection.objects.get(acron3=acronym)
-
-
 def upsert_monograph_source(
     payload,
     collection,
@@ -34,12 +27,12 @@ def upsert_monograph_source(
         source.acronym = ""
         source.title = payload.get("title") or str(payload.get("id"))
         source.identifiers = _build_source_identifiers(payload)
-        source.publisher_name = _as_list(payload.get("publisher"))
+        source.publisher_name = as_list(payload.get("publisher"))
         source.subject_areas = []
         source.wos_subject_areas = []
         source.default_lang = payload.get("language") or None
         source.publication_date = payload.get("publication_date") or None
-        source.publication_year = _normalize_year(payload.get("year"))
+        source.publication_year = normalize_year(payload.get("year"))
         source.access_type = _normalize_access_type(payload.get("is_comercial"))
         source.extra_data = _build_source_extra_data(
             payload,
@@ -54,14 +47,6 @@ def upsert_monograph_source(
     return source
 
 
-def delete_book_source(collection, book_id):
-    return Source.objects.filter(
-        collection=collection,
-        source_type=Source.SOURCE_TYPE_BOOK,
-        source_id=str(book_id),
-    ).delete()
-
-
 def _build_source_identifiers(payload):
     identifiers = {
         "book_id": str(payload.get("id")) if payload.get("id") is not None else None,
@@ -69,7 +54,7 @@ def _build_source_identifiers(payload):
         "eisbn": payload.get("eisbn"),
         "doi": payload.get("doi_number"),
     }
-    return _compact_dict(identifiers)
+    return compact_dict(identifiers)
 
 
 def _build_source_extra_data(payload, source_url=None, last_seq=None):
@@ -96,23 +81,7 @@ def _build_source_extra_data(payload, source_url=None, last_seq=None):
         "primary_descriptor": payload.get("primary_descriptor"),
         "translated_primary_descriptors": payload.get("translated_primary_descriptors"),
     }
-    return _compact_dict(extra_data)
-
-
-def _as_list(value):
-    if not value:
-        return []
-
-    if isinstance(value, list):
-        return value
-
-    return [value]
-
-
-def _normalize_year(value):
-    if value in (None, ""):
-        return None
-    return str(value)[:4]
+    return compact_dict(extra_data)
 
 
 def _normalize_access_type(value):
@@ -126,12 +95,6 @@ def _normalize_access_type(value):
         if normalized in {"false", "0", "no", "n", "nao", "não"}:
             return Source.ACCESS_TYPE_OPEN_ACCESS
 
-    return Source.ACCESS_TYPE_COMMERCIAL if bool(value) else Source.ACCESS_TYPE_OPEN_ACCESS
-
-
-def _compact_dict(data):
-    return {
-        key: value
-        for key, value in data.items()
-        if value not in (None, "", [], {}, ())
-    }
+    return (
+        Source.ACCESS_TYPE_COMMERCIAL if bool(value) else Source.ACCESS_TYPE_OPEN_ACCESS
+    )
diff --git a/source/services/journal.py b/source/services/journal.py
new file mode 100644
index 0000000..273c912
--- /dev/null
+++ b/source/services/journal.py
@@ -0,0 +1,56 @@
+from core.utils.metadata import as_list, compact_dict, get_value
+from source.models import Source
+
+
+def upsert_journal_source(
+    journal,
+    collection,
+    user=None,
+    force_update=True,
+    load_mode=None,
+):
+    scielo_issn = get_value(journal, "scielo_issn")
+    if not scielo_issn:
+        return None
+
+    source, created = Source.objects.get_or_create(
+        collection=collection,
+        source_type=Source.SOURCE_TYPE_JOURNAL,
+        source_id=scielo_issn,
+    )
+
+    if created and user:
+        source.creator = user
+
+    if created or force_update:
+        source.scielo_issn = scielo_issn
+        source.acronym = get_value(journal, "acronym") or ""
+        source.title = get_value(journal, "title") or scielo_issn
+        source.identifiers = _build_source_identifiers(journal)
+        source.publisher_name = as_list(get_value(journal, "publisher_name"))
+        source.subject_areas = as_list(get_value(journal, "subject_areas"))
+        source.wos_subject_areas = as_list(get_value(journal, "wos_subject_areas"))
+        source.default_lang = None
+        source.publication_date = None
+        source.publication_year = None
+        source.extra_data = compact_dict(
+            {
+                "collection_acronym": get_value(journal, "collection_acronym"),
+                "load_mode": load_mode,
+            }
+        )
+
+    if user:
+        source.updated_by = user
+
+    source.save()
+    return source
+
+
+def _build_source_identifiers(journal):
+    identifiers = {
+        "electronic_issn": get_value(journal, "electronic_issn"),
+        "print_issn": get_value(journal, "print_issn"),
+        "scielo_issn": get_value(journal, "scielo_issn"),
+    }
+    return compact_dict(identifiers)
diff --git a/source/services/journals.py b/source/services/journals.py
deleted file mode 100644
index ac133f6..0000000
--- a/source/services/journals.py
+++ /dev/null
@@ -1,118 +0,0 @@
-from django.db.models import Q
-
-from collection.models import Collection
-from source.models import Source
-
-
-def get_collection(acronym):
-    return Collection.objects.filter(acron3=acronym).first()
-
-
-def upsert_journal_source(
-    journal,
-    collection,
-    user=None,
-    force_update=True,
-    load_mode=None,
-):
-    scielo_issn = _value(journal, "scielo_issn")
-    if not scielo_issn:
-        return None
-
-    source, created = Source.objects.get_or_create(
-        collection=collection,
-        source_type=Source.SOURCE_TYPE_JOURNAL,
-        source_id=scielo_issn,
-    )
-
-    if created and user:
-        source.creator = user
-
-    if created or force_update:
-        source.scielo_issn = scielo_issn
-        source.acronym = _value(journal, "acronym") or ""
-        source.title = _value(journal, "title") or scielo_issn
-        source.identifiers = _build_source_identifiers(journal)
-        source.publisher_name = _as_list(_value(journal, "publisher_name"))
-        source.subject_areas = _as_list(_value(journal, "subject_areas"))
-        source.wos_subject_areas = _as_list(_value(journal, "wos_subject_areas"))
-        source.default_lang = None
-        source.publication_date = None
-        source.publication_year = None
-        source.extra_data = _compact_dict(
-            {
-                "collection_acronym": _value(journal, "collection_acronym"),
-                "load_mode": load_mode,
-            }
-        )
-
-    if user:
-        source.updated_by = user
-
-    source.save()
-    return source
-
-
-def find_journal_source_by_issns(collection, issns):
-    for issn in filter(None, issns or []):
-        source = (
-            Source.objects.filter(
-                collection=collection,
-                source_type=Source.SOURCE_TYPE_JOURNAL,
-            )
-            .filter(
-                Q(scielo_issn=issn)
-                | Q(source_id=issn)
-                | Q(identifiers__electronic_issn=issn)
-                | Q(identifiers__print_issn=issn)
-                | Q(identifiers__scielo_issn=issn)
-            )
-            .first()
-        )
-        if source:
-            return source
-    return None
-
-
-def find_journal_source_by_acronym(collection, acronym):
-    if not acronym:
-        return None
-
-    return Source.objects.filter(
-        collection=collection,
-        source_type=Source.SOURCE_TYPE_JOURNAL,
-        acronym=acronym,
-    ).first()
-
-
-def _build_source_identifiers(journal):
-    identifiers = {
-        "electronic_issn": _value(journal, "electronic_issn"),
-        "print_issn": _value(journal, "print_issn"),
-        "scielo_issn": _value(journal, "scielo_issn"),
-    }
-    return _compact_dict(identifiers)
-
-
-def _as_list(value):
-    if not value:
-        return []
-
-    if isinstance(value, list):
-        return value
-
-    return [value]
-
-
-def _value(data, key, default=None):
-    if isinstance(data, dict):
-        return data.get(key, default)
-    return getattr(data, key, default)
-
-
-def _compact_dict(data):
-    return {
-        key: value
-        for key, value in data.items()
-        if value not in (None, "", [], {}, ())
-    }
diff --git a/source/services/loaders.py b/source/services/loaders.py
new file mode 100644
index 0000000..4c99238
--- /dev/null
+++ b/source/services/loaders.py
@@ -0,0 +1,104 @@
+import logging
+
+from django.conf import settings
+
+from collection.models import Collection
+from core.collectors import articlemeta as articlemeta_collector
+from core.collectors import scielo_books as scielo_books_collector
+from source.models import Source
+from source.services import book as books_service
+from source.services import journal as journal_service
+
+
+def load_sources_from_article_meta(
+    collections=None,
+    force_update=True,
+    user=None,
+    mode="thrift",
+):
+    collection_codes = collections or Collection.acron3_list()
+
+    for collection_code in collection_codes:
+        logging.info(
+            "Loading sources from Article Meta. Collection: %s, Mode: %s",
+            collection_code,
+            mode,
+        )
+
+        for journal in articlemeta_collector.iter_journals(
+            collection=collection_code,
+            mode=mode,
+        ):
+            collection = Collection.objects.filter(
+                acron3=journal.collection_acronym
+            ).first()
+            if not collection:
+                logging.error(
+                    "Collection %s does not exist",
+                    journal.collection_acronym,
+                )
+                continue
+
+            source = journal_service.upsert_journal_source(
+                journal,
+                collection=collection,
+                user=user,
+                force_update=force_update,
+                load_mode=mode,
+            )
+            logging.info(
+                "Source %s upserted for collection %s",
+                source.source_id if source else None,
+                collection.acron3,
+            )
+
+    return True
+
+
+def load_sources_from_scielo_books(
+    collection="books",
+    db_name=settings.SCIELO_BOOKS_DB_NAME,
+    since=0,
+    limit=settings.SCIELO_BOOKS_LIMIT,
+    force_update=True,
+    headers=None,
+    base_url=None,
+    user=None,
+):
+    collection_obj = Collection.objects.get(acron3=collection)
+
+    logging.info(
+        "Loading sources from SciELO Books. Collection: %s, DB: %s, Since: %s, Limit: %s",
+        collection,
+        db_name,
+        since,
+        limit,
+    )
+
+    for item in scielo_books_collector.iter_change_documents(
+        base_url=base_url,
+        db_name=db_name,
+        since=since,
+        limit=limit,
+        headers=headers,
+    ):
+        change = item["change"]
+
+        if item["deleted"]:
+            Source.delete_book_source_by_id(collection_obj, change.get("id"))
+            continue
+
+        payload = item["payload"] or {}
+        if payload.get("TYPE") != "Monograph":
+            continue
+
+        books_service.upsert_monograph_source(
+            payload,
+            collection=collection_obj,
+            user=user,
+            force_update=force_update,
+            source_url=item.get("source_url"),
+            last_seq=change.get("seq"),
+        )
+
+    return True
diff --git a/source/tasks.py b/source/tasks.py
index 6b7eeb2..9ff1c74 100644
--- a/source/tasks.py
+++ b/source/tasks.py
@@ -1,106 +1,8 @@
-import logging
-
 from django.conf import settings
 
-from collection.models import Collection
 from config import celery_app
-from core.collectors import articlemeta as articlemeta_collector
-from core.collectors import scielo_books as scielo_books_collector
 from core.utils.request_utils import _get_user
-from source.services import books as books_service
-from source.services import journals as journal_service
-
-
-def load_sources_from_article_meta(
-    collections=None,
-    force_update=True,
-    user=None,
-    mode="thrift",
-):
-    collection_codes = collections or Collection.acron3_list()
-
-    for collection_code in collection_codes:
-        logging.info(
-            "Loading sources from Article Meta. Collection: %s, Mode: %s",
-            collection_code,
-            mode,
-        )
-
-        for journal in articlemeta_collector.iter_journals(
-            collection=collection_code,
-            mode=mode,
-        ):
-            collection = journal_service.get_collection(journal.collection_acronym)
-            if not collection:
-                logging.error(
-                    "Collection %s does not exist",
-                    journal.collection_acronym,
-                )
-                continue
-
-            source = journal_service.upsert_journal_source(
-                journal,
-                collection=collection,
-                user=user,
-                force_update=force_update,
-                load_mode=mode,
-            )
-            logging.info(
-                "Source %s upserted for collection %s",
-                source.source_id if source else None,
-                collection.acron3,
-            )
-
-    return True
-
-
-def load_sources_from_scielo_books(
-    collection="books",
-    db_name=settings.SCIELO_BOOKS_DB_NAME,
-    since=0,
-    limit=settings.SCIELO_BOOKS_LIMIT,
-    force_update=True,
-    headers=None,
-    base_url=None,
-    user=None,
-):
-    collection_obj = books_service.get_books_collection(collection)
-
-    logging.info(
-        "Loading sources from SciELO Books. Collection: %s, DB: %s, Since: %s, Limit: %s",
-        collection,
-        db_name,
-        since,
-        limit,
-    )
-
-    for item in scielo_books_collector.iter_change_documents(
-        base_url=base_url,
-        db_name=db_name,
-        since=since,
-        limit=limit,
-        headers=headers,
-    ):
-        change = item["change"]
-
-        if item["deleted"]:
-            books_service.delete_book_source(collection_obj, change.get("id"))
-            continue
-
-        payload = item["payload"] or {}
-        if payload.get("TYPE") != "Monograph":
-            continue
-
-        books_service.upsert_monograph_source(
-            payload,
-            collection=collection_obj,
-            user=user,
-            force_update=force_update,
-            source_url=item.get("source_url"),
-            last_seq=change.get("seq"),
-        )
-
-    return True
+from source.services import loaders
 
 
 @celery_app.task(bind=True, name="[Metadata] Sync Sources (Article Meta)", queue="load")
@@ -113,7 +15,7 @@ def task_load_sources_from_article_meta(
     mode="thrift",
 ):
     user = _get_user(self.request, username=username, user_id=user_id)
-    return load_sources_from_article_meta(
+    return loaders.load_sources_from_article_meta(
         collections=collections,
         force_update=force_update,
         user=user,
@@ -135,7 +37,7 @@ def task_load_sources_from_scielo_books(
     username=None,
 ):
     user = _get_user(self.request, username=username, user_id=user_id)
-    return load_sources_from_scielo_books(
+    return loaders.load_sources_from_scielo_books(
         collection=collection,
         db_name=db_name,
         since=since,
diff --git a/source/tests/__init__.py b/source/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/source/tests/test_models.py b/source/tests/test_models.py
new file mode 100644
index 0000000..d1206a9
--- /dev/null
+++ b/source/tests/test_models.py
@@ -0,0 +1,66 @@
+from django.test import TestCase
+
+from collection.models import Collection
+from source.models import Source
+
+
+class SourceLookupTests(TestCase):
+    def test_find_journal_by_issns_searches_source_and_identifier_fields(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            scielo_issn="1234-5678",
+            acronym="testjou",
+            title="Test Journal",
+            identifiers={
+                "electronic_issn": "2345-6789",
+                "print_issn": "8765-4321",
+                "scielo_issn": "3456-7890",
+            },
+        )
+
+        for issn in ("1234-5678", "2345-6789", "8765-4321", "3456-7890"):
+            self.assertEqual(
+                Source.find_journal_by_issns(collection, [issn]),
+                source,
+            )
+
+        self.assertIsNone(Source.find_journal_by_issns(collection, ["0000-0000"]))
+
+    def test_find_journal_by_acronym(self):
+        collection = Collection.objects.create(acron3="scl", acron2="sc")
+        source = Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_JOURNAL,
+            source_id="1234-5678",
+            acronym="testjou",
+            title="Test Journal",
+        )
+
+        self.assertEqual(Source.find_journal_by_acronym(collection, "testjou"), source)
+        self.assertIsNone(Source.find_journal_by_acronym(collection, "missing"))
+        self.assertIsNone(Source.find_journal_by_acronym(collection, ""))
+
+    def test_delete_book_source_by_id(self):
+        collection = Collection.objects.create(acron3="books", acron2="bk")
+        other_collection = Collection.objects.create(acron3="other", acron2="ot")
+        Source.objects.create(
+            collection=collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+            source_id="abcd1",
+            title="Book",
+        )
+        Source.objects.create(
+            collection=other_collection,
+            source_type=Source.SOURCE_TYPE_BOOK,
+            source_id="abcd1",
+            title="Book",
+        )
+
+        deleted_count, _ = Source.delete_book_source_by_id(collection, "abcd1")
+
+        self.assertEqual(deleted_count, 1)
+        self.assertFalse(Source.objects.filter(collection=collection).exists())
+        self.assertTrue(Source.objects.filter(collection=other_collection).exists())
diff --git a/source/tests.py b/source/tests/test_services.py
similarity index 68%
rename from source/tests.py
rename to source/tests/test_services.py
index a182f4e..4b72a08 100644
--- a/source/tests.py
+++ b/source/tests/test_services.py
@@ -1,23 +1,12 @@
 from django.test import TestCase
 
 from collection.models import Collection
-
-from .models import Source
-from .services import books as books_service
-from .services import journals as journal_service
+from source.models import Source
+from source.services import book as books_service
+from source.services import journal as journal_service
 
 
 class SourceMetadataTests(TestCase):
-    def test_source_type_choices_include_scielo_non_journal_sources(self):
-        self.assertIn(
-            (Source.SOURCE_TYPE_PREPRINT_SERVER, "Preprint Server"),
-            [(value, str(label)) for value, label in Source.SOURCE_TYPE_CHOICES],
-        )
-        self.assertIn(
-            (Source.SOURCE_TYPE_DATA_REPOSITORY, "Data Repository"),
-            [(value, str(label)) for value, label in Source.SOURCE_TYPE_CHOICES],
-        )
-
     def test_metadata_exposes_generic_and_journal_fields(self):
         collection = Collection.objects.create(acron3="scl", acron2="sc")
         Source.objects.create(
@@ -50,6 +39,8 @@ def test_metadata_exposes_generic_and_journal_fields(self):
         self.assertEqual(metadata[0]["issns"], {"1234-5678", "8765-4321"})
         self.assertEqual(metadata[0]["title"], "Test Journal")
 
+
+class BookSourceServiceTests(TestCase):
     def test_upsert_monograph_source_maps_scielo_books_payload(self):
         collection = Collection.objects.create(acron3="books", acron2="bk")
 
@@ -78,27 +69,8 @@ def test_upsert_monograph_source_maps_scielo_books_payload(self):
         self.assertEqual(source.publication_year, "2024")
         self.assertEqual(source.access_type, Source.ACCESS_TYPE_OPEN_ACCESS)
 
-    def test_upsert_monograph_source_accepts_long_real_world_title(self):
-        collection = Collection.objects.create(acron3="books", acron2="bk")
-        title = (
-            "O Estado da Arte sobre Refugiados, Deslocados Internos, "
-            "Deslocados Ambientais e Apatridas no Brasil: atualizacao do "
-            "Diretorio Nacional do ACNUR de teses, dissertacoes, trabalhos "
-            "de conclusao de curso de graduacao em Joao Pessoa (Paraiba) e "
-            "artigos (2007 a 2017)"
-        )
-
-        source = books_service.upsert_monograph_source(
-            {
-                "TYPE": "Monograph",
-                "id": "9zzts",
-                "title": title,
-            },
-            collection=collection,
-        )
-
-        self.assertEqual(source.title, title)
 
+class JournalSourceServiceTests(TestCase):
     def test_upsert_journal_source_maps_articlemeta_payload(self):
         collection = Collection.objects.create(acron3="scl", acron2="sc")
 
@@ -123,11 +95,3 @@ def test_upsert_journal_source_maps_articlemeta_payload(self):
         self.assertEqual(source.identifiers["electronic_issn"], "1234-5678")
         self.assertEqual(source.publisher_name, ["SciELO"])
         self.assertEqual(source.extra_data["load_mode"], "thrift")
-        self.assertEqual(
-            journal_service.find_journal_source_by_issns(collection, ["8765-4321"]).pk,
-            source.pk,
-        )
-        self.assertEqual(
-            journal_service.find_journal_source_by_acronym(collection, "testjou").pk,
-            source.pk,
-        )
diff --git a/source/wagtail_hooks.py b/source/wagtail_hooks.py
index 5ffad62..4d62334 100644
--- a/source/wagtail_hooks.py
+++ b/source/wagtail_hooks.py
@@ -1,7 +1,7 @@
 from django.utils.translation import gettext_lazy as _
 from wagtail.snippets.views.snippets import SnippetViewSet
 
-from .models import Source
+from source.models import Source
 
 
 class SourceSnippetViewSet(SnippetViewSet):
diff --git a/tracker/choices.py b/tracker/choices.py
index dfc562c..81abcb5 100644
--- a/tracker/choices.py
+++ b/tracker/choices.py
@@ -1,11 +1,10 @@
 from django.utils.translation import gettext_lazy as _
 
-
-LOG_FILE_DISCARDED_LINE_REASON_MISSING_METADATA = 'MET'
-LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT = 'DOC'
-LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE = 'SRC'
-LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION = 'URL'
-LOG_FILE_DISCARDED_LINE_REASON_DATABASE_ERROR = 'DBE'
+LOG_FILE_DISCARDED_LINE_REASON_MISSING_METADATA = "MET"
+LOG_FILE_DISCARDED_LINE_REASON_MISSING_DOCUMENT = "DOC"
+LOG_FILE_DISCARDED_LINE_REASON_MISSING_SOURCE = "SRC"
+LOG_FILE_DISCARDED_LINE_REASON_URL_TRANSLATION = "URL"
+LOG_FILE_DISCARDED_LINE_REASON_DATABASE_ERROR = "DBE"
 
 LOG_FILE_DISCARDED_LINE_REASON = [
     (LOG_FILE_DISCARDED_LINE_REASON_MISSING_METADATA, _("Missing Metadata")),
diff --git a/tracker/models.py b/tracker/models.py
index a394ed6..0654c31 100644
--- a/tracker/models.py
+++ b/tracker/models.py
@@ -3,15 +3,15 @@
 
 from log_manager.models import LogFile
 from tracker import choices
-from .exceptions import LogFileDiscardedLineCreateError
+from tracker.exceptions import LogFileDiscardedLineCreateError
 
 
 class LogFileDiscardedLine(models.Model):
     created = models.DateTimeField(verbose_name=_("Creation date"), auto_now_add=True)
     log_file = models.ForeignKey(
-        LogFile, 
-        on_delete=models.CASCADE, 
-        null=False, 
+        LogFile,
+        on_delete=models.CASCADE,
+        null=False,
         blank=False,
         db_index=True,
     )
@@ -31,10 +31,7 @@ class LogFileDiscardedLine(models.Model):
         null=True,
         blank=True,
     )
-    handled = models.BooleanField(
-        _("Handled"),
-        default=False
-    )
+    handled = models.BooleanField(_("Handled"), default=False)
 
     @classmethod
     def create(cls, log_file, error_type, data, message, save=False):
@@ -54,6 +51,3 @@ def create(cls, log_file, error_type, data, message, save=False):
 
     def __str__(self):
         return f"{self.data} - {self.message}"
-
-
-
diff --git a/tracker/wagtail_hooks.py b/tracker/wagtail_hooks.py
index 1ceb9c7..f62f63f 100644
--- a/tracker/wagtail_hooks.py
+++ b/tracker/wagtail_hooks.py
@@ -1,16 +1,16 @@
 from django.utils.translation import gettext as _
-from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
 from wagtail.snippets.models import register_snippet
+from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup
 
 from config.menu import get_menu_order
 
-from .models import LogFileDiscardedLine
+from tracker.models import LogFileDiscardedLine
 
 
 class LogFileDiscardedLineSnippetViewSet(SnippetViewSet):
     model = LogFileDiscardedLine
     menu_label = _("Discarded Lines")
-    icon = 'warning'
+    icon = "warning"
     menu_order = get_menu_order("tracker")
     add_to_admin_menu = False
 
@@ -20,12 +20,7 @@ class LogFileDiscardedLineSnippetViewSet(SnippetViewSet):
         "message",
         "handled",
     )
-    list_filter = (
-        "log_file__collection",
-        "log_file", 
-        "handled",
-        "error_type"
-    )
+    list_filter = ("log_file__collection", "log_file", "handled", "error_type")
     search_fields = (
         "data",
         "message",
@@ -39,17 +34,13 @@ class LogFileDiscardedLineSnippetViewSet(SnippetViewSet):
     )
 
 
-
-
 class TrackerSnippetViewSetGroup(SnippetViewSetGroup):
-    menu_name = 'tracker'
+    menu_name = "tracker"
     menu_label = _("Tracker")
     icon = "folder-open-inverse"
     menu_order = get_menu_order("tracker")
-    
-    items = (
-        LogFileDiscardedLineSnippetViewSet,
-    )
+
+    items = (LogFileDiscardedLineSnippetViewSet,)
 
 
 register_snippet(TrackerSnippetViewSetGroup)