Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.pepej.truncator.indexer
- import com.pepej.truncator.database.ProcessedLogs
- import io.ktor.client.HttpClient
- import io.ktor.client.engine.cio.CIO
- import io.ktor.client.plugins.HttpTimeout
- import io.ktor.client.plugins.auth.Auth
- import io.ktor.client.plugins.auth.providers.BasicAuthCredentials
- import io.ktor.client.plugins.auth.providers.basic
- import io.ktor.client.request.get
- import io.ktor.client.statement.bodyAsBytes
- import io.ktor.client.statement.bodyAsText
- import io.ktor.http.isSuccess
- import kotlinx.coroutines.Dispatchers
- import kotlinx.coroutines.async
- import kotlinx.coroutines.awaitAll
- import kotlinx.coroutines.coroutineScope
- import kotlinx.coroutines.delay
- import kotlinx.coroutines.withContext
- import kotlinx.datetime.Clock.System.now
- import kotlinx.datetime.LocalDate
- import kotlinx.datetime.TimeZone
- import kotlinx.datetime.toLocalDateTime
- import kotlinx.io.IOException
- import okio.buffer
- import okio.gzip
- import okio.sink
- import okio.source
- import org.jetbrains.exposed.sql.and
- import org.jetbrains.exposed.sql.insert
- import org.jetbrains.exposed.sql.selectAll
- import org.jetbrains.exposed.sql.transactions.experimental.newSuspendedTransaction
- import org.jsoup.Jsoup
- import org.slf4j.LoggerFactory
- import java.io.File
- import java.net.URL
- private const val BASE_URL = "https://privatelogs.mcskill.net"
- private const val USERNAME = "Truncator"
- private const val PASSWORD = "PASSWORD"
- private val log = LoggerFactory.getLogger("LogsDownloader")
- fun parseFilenameDate(filename: String): LocalDate? {
- val regex = Regex("(\\d{4})[.\\-_](\\d{2})[.\\-_](\\d{2})")
- val match = regex.find(filename) ?: return null
- return try {
- val (yyyy, mm, dd) = match.destructured
- LocalDate(yyyy.toInt(), mm.toInt(), dd.toInt())
- } catch (e: Exception) {
- null
- }
- }
- suspend fun fetchAndIndexFile(
- client: HttpClient,
- fileUrl: String,
- filename: String,
- serverId: Int,
- saveFolder: String = "mclogs",
- force: Boolean = false
- ) {
- withContext(Dispatchers.IO) {
- val folder = File(saveFolder)
- if (!folder.exists()) folder.mkdirs()
- val gzFile = File(folder, filename)
- val baseName = filename.removeSuffix(".gz")
- val logFile = File(folder, baseName)
- if (!force && logFile.exists()) {
- processFile(baseName, logFile, filename, serverId)
- return@withContext
- }
- val bytes = client.downloadGzWithRetries(fileUrl) ?: run {
- log.error("Failed to download gzip file $fileUrl")
- return@withContext
- }
- gzFile.writeBytes(bytes)
- try {
- gzFile.source().gzip().use { unzipSource ->
- val bufferedUnzip = unzipSource.buffer()
- logFile.sink().buffer().use { out ->
- bufferedUnzip.readAll(out)
- }
- }
- } catch (ex: IOException) {
- log.error("Файл $fileUrl не является валидным gzip: ${ex.message}")
- return@withContext
- }
- gzFile.delete()
- processFile(baseName, logFile, filename, serverId)
- }
- }
- private suspend fun processFile(baseName: String, logFile: File, filename: String, serverId: Int) {
- parseFilenameDate(baseName)?.let { date ->
- indexLogFile(logFile, date, serverId)
- newSuspendedTransaction {
- ProcessedLogs.insert {
- it[ProcessedLogs.filename] = filename
- it[ProcessedLogs.serverId] = serverId
- it[processedAt] = now().toLocalDateTime(TimeZone.currentSystemDefault())
- }
- }
- }
- }
- suspend fun HttpClient.downloadGzWithRetries(
- url: String,
- maxAttempts: Int = 3,
- delayBetweenAttemptsMs: Long = 2000
- ): ByteArray? {
- repeat(maxAttempts) { attemptIndex ->
- try {
- val response = get(url)
- if (!response.status.isSuccess()) {
- log.warn("Attempt #${attemptIndex + 1}: Code ${response.status} for $url")
- } else {
- val bytes = response.bodyAsBytes()
- if (bytes.size >= 2 && bytes[0] == 0x1F.toByte() && bytes[1] == 0x8B.toByte()) {
- return bytes
- } else {
- log.warn("Attempt #${attemptIndex + 1}: Not GZIP bytes. " +
- "First bytes=${bytes.take(2)} at $url")
- }
- }
- } catch (e: Exception) {
- log.warn("Attempt #${attemptIndex + 1} failed: ${e.message}")
- }
- if (attemptIndex < maxAttempts - 1) {
- delay(delayBetweenAttemptsMs)
- }
- }
- log.error("All $maxAttempts attempts to download '$url' have failed or not GZIP.")
- return null
- }
- suspend fun updateLogs(
- baseFolder: String = "mclogs",
- force: Boolean = false
- ) {
- val client = HttpClient(CIO) {
- install(Auth) {
- basic {
- credentials {
- BasicAuthCredentials(USERNAME, PASSWORD)
- }
- }
- }
- install(HttpTimeout) {
- connectTimeoutMillis = 60_000
- requestTimeoutMillis = 60_000
- socketTimeoutMillis = 60_000
- }
- }
- client.use { c ->
- for (server in Servers.entries) {
- val serverId = server.id
- val serverName = server.name.lowercase()
- val serverFolder = "$baseFolder/$serverName"
- val serverUrl = "$BASE_URL/?serverid=$serverId"
- log.info("Обновление логов для сервера $serverName ($serverId)")
- try {
- val html = c.get(serverUrl).bodyAsText()
- val doc = Jsoup.parse(html)
- val links = doc.select("a")
- .toList()
- .filter { it.attr("href").contains(".log.gz") }
- val tasks = links
- .filterNot { link ->
- val text = link.text().trim()
- val fileName = if (text.isNotEmpty()) File(text).name else return@filterNot false
- checkFileAlreadyProcessed(serverId, fileName)
- }
- .map { link ->
- val href = link.attr("href")
- val fullUrl = URL(URL(serverUrl), href).toString()
- val text = link.text().trim()
- val name = if (text.isNotEmpty()) File(text).name else File(fullUrl).name
- fullUrl to name
- }
- log.info("Будет обработано файлов: ${tasks.size} для $serverName")
- coroutineScope {
- tasks.map { (url, fname) ->
- async {
- fetchAndIndexFile(c, url, fname, serverId, serverFolder, force)
- }
- }.awaitAll()
- }
- log.info("Готово: скачаны и проиндексированы ${tasks.size} лог-файлов для $serverName.")
- } catch (e: Exception) {
- log.error("Ошибка загрузки логов для сервера $serverName: ${e.message}", e)
- }
- }
- }
- }
- suspend fun checkFileAlreadyProcessed(serverId: Int, fileName: String): Boolean {
- return newSuspendedTransaction {
- ProcessedLogs.selectAll()
- .where { (ProcessedLogs.filename eq fileName) and (ProcessedLogs.serverId eq serverId) }
- .any()
- }
- }
Add Comment
Please, Sign In to add comment