Compare commits

4 Commits

Author SHA1 Message Date
7860819029 Add CI/CD 2025-07-15 19:14:54 +02:00
304490b52e Correct YGO Fandom name transformation regex 2025-07-06 15:05:51 +02:00
ce5b87c34e Minor moddel adjustments 2025-07-01 12:54:56 +02:00
a9f6efc818 Minor config adjustment 2025-07-01 12:54:32 +02:00
11 changed files with 81 additions and 42 deletions

View File

@@ -0,0 +1,32 @@
name: Create and Push Release
on:
workflow_dispatch:
env:
AUTHENTIK_URL: https://auth.smoothbrain.win
REGISTRY_URL: gitea.smoothbrain.win
IMAGE_OWNER: rak
IMAGE_NAME: dex-scraper-java
jobs:
release:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup JDK
uses: https://gitea.smoothbrain.win/rak/setup-java@main
with:
distribution: 'corretto'
java-version: '21.0.6'
cache: 'gradle'
- name: Build & Push Image
env:
QUARKUS_CONTAINER_IMAGE_USERNAME: ${{ secrets.CI_SERVICE_ACCOUNT }}
QUARKUS_CONTAINER_IMAGE_PASSWORD: ${{ secrets.CI_SERVICE_ACCOUNT_PASSWORD }}
run: |
./gradlew clean build \
-Dquarkus.container-image.push=true

View File

@@ -22,6 +22,7 @@ dependencies {
implementation("io.quarkus:quarkus-rest-client-kotlin-serialization")
implementation("io.quarkus:quarkus-rest-jackson")
implementation("io.quarkus:quarkus-kotlin")
implementation("io.quarkus:quarkus-smallrye-fault-tolerance")
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8")
implementation("io.quarkus:quarkus-arc")
implementation("org.jsoup:jsoup:1.20.1")

View File

@@ -1,11 +1,15 @@
package com.rak.config.model
import io.smallrye.config.WithDefault
import io.smallrye.config.WithName
import java.util.*
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
@WithName("type")
fun getType(): String
@WithName("nullable")
@WithDefault("false")
fun isNullable(): Boolean
@WithName("root")
fun getRootConfig(): Optional<ExtractConfig>
@WithName("extractors")

View File

@@ -1,9 +1,7 @@
package com.rak.model.card
import com.rak.model.set.RegionalSet
data class CardPrint(
val id: String,
var id: Int,
val name: String,
val regionalName: String? = null,
val rarity: String
@@ -11,10 +9,17 @@ data class CardPrint(
companion object {
fun fromMap(map: Map<String, String>): CardPrint {
val regionalNameValue = map["regionalName"]
val regionalName = if (regionalNameValue == "") {
null
} else {
regionalNameValue
}
return CardPrint(
map["id"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
map["id"]?.toInt() ?: throw IllegalStateException("Parameter 'prefix' not found"),
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
map["regionalName"],
regionalName,
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
)
}

View File

@@ -22,28 +22,6 @@ data class RegionalSet(
)
}
fun flattenFromMemberLists(
idList: List<String>,
languageList: List<String>,
regionKeyAliasList: List<String>,
): MutableSet<RegionalSet> {
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
throw IllegalArgumentException("Lists have to be the same size")
}
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
for (index in 0..idList.size - 1) {
regionalSetList.add(RegionalSet(
prefix = idList[index],
region = languageList[index],
regionCode = regionKeyAliasList[index],
listOf(),
numberOfCards = -1
))
}
return regionalSetList
}
}
}

View File

@@ -11,7 +11,7 @@ class TransformationRegistry {
init {
register("trim") { it.trim() }
register("removeInnerQuotes") { it.replace("\"", "") }
register("removeInnerQuotes") { it.replace(Regex("^\""), "").replace(Regex("\"$"), "") }
register("replace") { input, parameters ->
require(parameters.size == 1 || parameters.size == 2) {
"'replace' requires either 1 or 2 parameters"

View File

@@ -46,7 +46,7 @@ class CommonCrawlService(
crawlName.indexName
))
} catch (ex: RuntimeException) {
Log.warn("Error occurred querying crawl '${crawlName.indexName}' for URL $url")
Log.warn("Error occurred querying crawl '${crawlName.indexName}' for URL $url", ex)
}
}

View File

@@ -54,7 +54,7 @@ class ScrapeService(
try {
document = Jsoup.connect(url).get()
} catch(ex: Exception) {
Log.warn("Error occurred during Jsoup query")
Log.warn("Error occurred during Jsoup query", ex)
throw TargetNotFoundException("Could not find '$setName' for Provider '$provider'")
}
}

View File

@@ -6,14 +6,17 @@ import io.netty.buffer.ByteBufInputStream
import io.quarkus.rest.client.reactive.ClientQueryParam
import io.quarkus.rest.client.reactive.NotBody
import io.quarkus.rest.client.reactive.Url
import io.smallrye.faulttolerance.api.RateLimit
import jakarta.ws.rs.Consumes
import jakarta.ws.rs.GET
import jakarta.ws.rs.Path
import jakarta.ws.rs.PathParam
import jakarta.ws.rs.QueryParam
import org.eclipse.microprofile.faulttolerance.Bulkhead
import org.eclipse.microprofile.rest.client.annotation.ClientHeaderParam
import org.eclipse.microprofile.rest.client.annotation.RegisterProvider
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
import java.time.temporal.ChronoUnit
@RegisterRestClient(baseUri = "whatever")
@RegisterProvider(NDJsonReader::class)
@@ -23,6 +26,11 @@ interface CommonCrawlRestClient {
@ClientQueryParam(name = "output", value = ["json"])
@Path("/{index}-index")
@Consumes("text/x-ndjson")
@RateLimit(
value = 1,
minSpacing = 5
)
@Bulkhead
fun queryIndex(
@Url
baseUrl: String,

View File

@@ -130,7 +130,11 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
val extractedText = extractTextFromElementByTargetFieldConfig(
rootElement,
fieldConfig
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
) ?: if (fieldConfig.isNullable()) {
""
} else {
throw ElementNotFoundException("Could not find element for '$identifier'")
}
val mapToModify: MutableMap<String, String> = try {
resultList[index]
@@ -224,14 +228,8 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
} catch (ex: RuntimeException) {
when (ex) {
is ElementNotFoundException,
is IllegalStateException -> {
// if (extractionConfig.getFallbackConfiguration().isPresent) {
// intermediateResult = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue()
// } else {
// throw ex
// }
}
is ValueValidationException -> Log.warn(ex.message)
is IllegalStateException,
is ValueValidationException -> Log.debug(ex.message)
else -> throw ex
}
}

View File

@@ -1,6 +1,15 @@
quarkus:
container-image:
registry: gitea.smoothbrain.win
group: rak
build: true
additional-tags: latest
http:
port: 8081
live-reload:
instrumentation: true
scraper:
sources:
@@ -96,6 +105,11 @@ scraper:
" .+",
""
]
- name: "regexReplace"
parameters: [
".+-[A-Za-z]*0?",
""
]
validation:
pattern: "^.+-.+\\\\d.+$"
name:
@@ -109,7 +123,7 @@ scraper:
transform:
- name: "regexReplace"
parameters: [
"\\(.+\\)",
" ?\\(.+\\)",
""
]
- name: "removeInnerQuotes"
@@ -117,9 +131,8 @@ scraper:
validation:
pattern: "^\".+\".*"
regional-name:
fallback:
default: "N/A"
type: int
nullable: true
extractors:
- steps:
- type: xpath