Compare commits
4 Commits
5930da7a4c
...
7860819029
| Author | SHA1 | Date | |
|---|---|---|---|
| 7860819029 | |||
| 304490b52e | |||
| ce5b87c34e | |||
| a9f6efc818 |
32
.gitea/workflows/release.yml
Normal file
32
.gitea/workflows/release.yml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
name: Create and Push Release
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
env:
|
||||||
|
AUTHENTIK_URL: https://auth.smoothbrain.win
|
||||||
|
REGISTRY_URL: gitea.smoothbrain.win
|
||||||
|
IMAGE_OWNER: rak
|
||||||
|
IMAGE_NAME: dex-scraper-java
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
release:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup JDK
|
||||||
|
uses: https://gitea.smoothbrain.win/rak/setup-java@main
|
||||||
|
with:
|
||||||
|
distribution: 'corretto'
|
||||||
|
java-version: '21.0.6'
|
||||||
|
cache: 'gradle'
|
||||||
|
|
||||||
|
- name: Build & Push Image
|
||||||
|
env:
|
||||||
|
QUARKUS_CONTAINER_IMAGE_USERNAME: ${{ secrets.CI_SERVICE_ACCOUNT }}
|
||||||
|
QUARKUS_CONTAINER_IMAGE_PASSWORD: ${{ secrets.CI_SERVICE_ACCOUNT_PASSWORD }}
|
||||||
|
run: |
|
||||||
|
./gradlew clean build \
|
||||||
|
-Dquarkus.container-image.push=true
|
||||||
@@ -22,6 +22,7 @@ dependencies {
|
|||||||
implementation("io.quarkus:quarkus-rest-client-kotlin-serialization")
|
implementation("io.quarkus:quarkus-rest-client-kotlin-serialization")
|
||||||
implementation("io.quarkus:quarkus-rest-jackson")
|
implementation("io.quarkus:quarkus-rest-jackson")
|
||||||
implementation("io.quarkus:quarkus-kotlin")
|
implementation("io.quarkus:quarkus-kotlin")
|
||||||
|
implementation("io.quarkus:quarkus-smallrye-fault-tolerance")
|
||||||
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8")
|
implementation("org.jetbrains.kotlin:kotlin-stdlib-jdk8")
|
||||||
implementation("io.quarkus:quarkus-arc")
|
implementation("io.quarkus:quarkus-arc")
|
||||||
implementation("org.jsoup:jsoup:1.20.1")
|
implementation("org.jsoup:jsoup:1.20.1")
|
||||||
|
|||||||
@@ -1,11 +1,15 @@
|
|||||||
package com.rak.config.model
|
package com.rak.config.model
|
||||||
|
|
||||||
|
import io.smallrye.config.WithDefault
|
||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
import java.util.*
|
import java.util.*
|
||||||
|
|
||||||
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
|
interface ScrapeTargetFieldConfig : AbstractScrapeTargetFieldConfig {
|
||||||
@WithName("type")
|
@WithName("type")
|
||||||
fun getType(): String
|
fun getType(): String
|
||||||
|
@WithName("nullable")
|
||||||
|
@WithDefault("false")
|
||||||
|
fun isNullable(): Boolean
|
||||||
@WithName("root")
|
@WithName("root")
|
||||||
fun getRootConfig(): Optional<ExtractConfig>
|
fun getRootConfig(): Optional<ExtractConfig>
|
||||||
@WithName("extractors")
|
@WithName("extractors")
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
package com.rak.model.card
|
package com.rak.model.card
|
||||||
|
|
||||||
import com.rak.model.set.RegionalSet
|
|
||||||
|
|
||||||
data class CardPrint(
|
data class CardPrint(
|
||||||
val id: String,
|
var id: Int,
|
||||||
val name: String,
|
val name: String,
|
||||||
val regionalName: String? = null,
|
val regionalName: String? = null,
|
||||||
val rarity: String
|
val rarity: String
|
||||||
@@ -11,10 +9,17 @@ data class CardPrint(
|
|||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
fun fromMap(map: Map<String, String>): CardPrint {
|
fun fromMap(map: Map<String, String>): CardPrint {
|
||||||
|
val regionalNameValue = map["regionalName"]
|
||||||
|
val regionalName = if (regionalNameValue == "") {
|
||||||
|
null
|
||||||
|
} else {
|
||||||
|
regionalNameValue
|
||||||
|
}
|
||||||
|
|
||||||
return CardPrint(
|
return CardPrint(
|
||||||
map["id"] ?: throw IllegalStateException("Parameter 'prefix' not found"),
|
map["id"]?.toInt() ?: throw IllegalStateException("Parameter 'prefix' not found"),
|
||||||
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
|
map["name"] ?: throw IllegalStateException("Parameter 'region' not found"),
|
||||||
map["regionalName"],
|
regionalName,
|
||||||
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
|
map["rarity"] ?: throw IllegalStateException("Parameter 'regionCode' not found"),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,28 +22,6 @@ data class RegionalSet(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun flattenFromMemberLists(
|
|
||||||
idList: List<String>,
|
|
||||||
languageList: List<String>,
|
|
||||||
regionKeyAliasList: List<String>,
|
|
||||||
): MutableSet<RegionalSet> {
|
|
||||||
if (idList.size != languageList.size && idList.size != regionKeyAliasList.size) {
|
|
||||||
throw IllegalArgumentException("Lists have to be the same size")
|
|
||||||
}
|
|
||||||
|
|
||||||
val regionalSetList: MutableSet<RegionalSet> = mutableSetOf()
|
|
||||||
for (index in 0..idList.size - 1) {
|
|
||||||
regionalSetList.add(RegionalSet(
|
|
||||||
prefix = idList[index],
|
|
||||||
region = languageList[index],
|
|
||||||
regionCode = regionKeyAliasList[index],
|
|
||||||
listOf(),
|
|
||||||
numberOfCards = -1
|
|
||||||
))
|
|
||||||
}
|
|
||||||
return regionalSetList
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -11,7 +11,7 @@ class TransformationRegistry {
|
|||||||
|
|
||||||
init {
|
init {
|
||||||
register("trim") { it.trim() }
|
register("trim") { it.trim() }
|
||||||
register("removeInnerQuotes") { it.replace("\"", "") }
|
register("removeInnerQuotes") { it.replace(Regex("^\""), "").replace(Regex("\"$"), "") }
|
||||||
register("replace") { input, parameters ->
|
register("replace") { input, parameters ->
|
||||||
require(parameters.size == 1 || parameters.size == 2) {
|
require(parameters.size == 1 || parameters.size == 2) {
|
||||||
"'replace' requires either 1 or 2 parameters"
|
"'replace' requires either 1 or 2 parameters"
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ class CommonCrawlService(
|
|||||||
crawlName.indexName
|
crawlName.indexName
|
||||||
))
|
))
|
||||||
} catch (ex: RuntimeException) {
|
} catch (ex: RuntimeException) {
|
||||||
Log.warn("Error occurred querying crawl '${crawlName.indexName}' for URL $url")
|
Log.warn("Error occurred querying crawl '${crawlName.indexName}' for URL $url", ex)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ class ScrapeService(
|
|||||||
try {
|
try {
|
||||||
document = Jsoup.connect(url).get()
|
document = Jsoup.connect(url).get()
|
||||||
} catch(ex: Exception) {
|
} catch(ex: Exception) {
|
||||||
Log.warn("Error occurred during Jsoup query")
|
Log.warn("Error occurred during Jsoup query", ex)
|
||||||
throw TargetNotFoundException("Could not find '$setName' for Provider '$provider'")
|
throw TargetNotFoundException("Could not find '$setName' for Provider '$provider'")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,14 +6,17 @@ import io.netty.buffer.ByteBufInputStream
|
|||||||
import io.quarkus.rest.client.reactive.ClientQueryParam
|
import io.quarkus.rest.client.reactive.ClientQueryParam
|
||||||
import io.quarkus.rest.client.reactive.NotBody
|
import io.quarkus.rest.client.reactive.NotBody
|
||||||
import io.quarkus.rest.client.reactive.Url
|
import io.quarkus.rest.client.reactive.Url
|
||||||
|
import io.smallrye.faulttolerance.api.RateLimit
|
||||||
import jakarta.ws.rs.Consumes
|
import jakarta.ws.rs.Consumes
|
||||||
import jakarta.ws.rs.GET
|
import jakarta.ws.rs.GET
|
||||||
import jakarta.ws.rs.Path
|
import jakarta.ws.rs.Path
|
||||||
import jakarta.ws.rs.PathParam
|
import jakarta.ws.rs.PathParam
|
||||||
import jakarta.ws.rs.QueryParam
|
import jakarta.ws.rs.QueryParam
|
||||||
|
import org.eclipse.microprofile.faulttolerance.Bulkhead
|
||||||
import org.eclipse.microprofile.rest.client.annotation.ClientHeaderParam
|
import org.eclipse.microprofile.rest.client.annotation.ClientHeaderParam
|
||||||
import org.eclipse.microprofile.rest.client.annotation.RegisterProvider
|
import org.eclipse.microprofile.rest.client.annotation.RegisterProvider
|
||||||
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
|
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
|
||||||
|
import java.time.temporal.ChronoUnit
|
||||||
|
|
||||||
@RegisterRestClient(baseUri = "whatever")
|
@RegisterRestClient(baseUri = "whatever")
|
||||||
@RegisterProvider(NDJsonReader::class)
|
@RegisterProvider(NDJsonReader::class)
|
||||||
@@ -23,6 +26,11 @@ interface CommonCrawlRestClient {
|
|||||||
@ClientQueryParam(name = "output", value = ["json"])
|
@ClientQueryParam(name = "output", value = ["json"])
|
||||||
@Path("/{index}-index")
|
@Path("/{index}-index")
|
||||||
@Consumes("text/x-ndjson")
|
@Consumes("text/x-ndjson")
|
||||||
|
@RateLimit(
|
||||||
|
value = 1,
|
||||||
|
minSpacing = 5
|
||||||
|
)
|
||||||
|
@Bulkhead
|
||||||
fun queryIndex(
|
fun queryIndex(
|
||||||
@Url
|
@Url
|
||||||
baseUrl: String,
|
baseUrl: String,
|
||||||
|
|||||||
@@ -130,7 +130,11 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
|
|||||||
val extractedText = extractTextFromElementByTargetFieldConfig(
|
val extractedText = extractTextFromElementByTargetFieldConfig(
|
||||||
rootElement,
|
rootElement,
|
||||||
fieldConfig
|
fieldConfig
|
||||||
) ?: throw ElementNotFoundException("Could not find element for '$identifier'")
|
) ?: if (fieldConfig.isNullable()) {
|
||||||
|
""
|
||||||
|
} else {
|
||||||
|
throw ElementNotFoundException("Could not find element for '$identifier'")
|
||||||
|
}
|
||||||
|
|
||||||
val mapToModify: MutableMap<String, String> = try {
|
val mapToModify: MutableMap<String, String> = try {
|
||||||
resultList[index]
|
resultList[index]
|
||||||
@@ -224,14 +228,8 @@ abstract class AbstractExtractionService<E, T : AbstractScrapeTargetConfig> {
|
|||||||
} catch (ex: RuntimeException) {
|
} catch (ex: RuntimeException) {
|
||||||
when (ex) {
|
when (ex) {
|
||||||
is ElementNotFoundException,
|
is ElementNotFoundException,
|
||||||
is IllegalStateException -> {
|
is IllegalStateException,
|
||||||
// if (extractionConfig.getFallbackConfiguration().isPresent) {
|
is ValueValidationException -> Log.debug(ex.message)
|
||||||
// intermediateResult = extractionConfig.getFallbackConfiguration().get().getOptionalDefaultValue()
|
|
||||||
// } else {
|
|
||||||
// throw ex
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
is ValueValidationException -> Log.warn(ex.message)
|
|
||||||
else -> throw ex
|
else -> throw ex
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,15 @@
|
|||||||
quarkus:
|
quarkus:
|
||||||
|
|
||||||
|
container-image:
|
||||||
|
registry: gitea.smoothbrain.win
|
||||||
|
group: rak
|
||||||
|
build: true
|
||||||
|
additional-tags: latest
|
||||||
|
|
||||||
http:
|
http:
|
||||||
port: 8081
|
port: 8081
|
||||||
|
live-reload:
|
||||||
|
instrumentation: true
|
||||||
|
|
||||||
scraper:
|
scraper:
|
||||||
sources:
|
sources:
|
||||||
@@ -96,6 +105,11 @@ scraper:
|
|||||||
" .+",
|
" .+",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
|
- name: "regexReplace"
|
||||||
|
parameters: [
|
||||||
|
".+-[A-Za-z]*0?",
|
||||||
|
""
|
||||||
|
]
|
||||||
validation:
|
validation:
|
||||||
pattern: "^.+-.+\\\\d.+$"
|
pattern: "^.+-.+\\\\d.+$"
|
||||||
name:
|
name:
|
||||||
@@ -109,7 +123,7 @@ scraper:
|
|||||||
transform:
|
transform:
|
||||||
- name: "regexReplace"
|
- name: "regexReplace"
|
||||||
parameters: [
|
parameters: [
|
||||||
"\\(.+\\)",
|
" ?\\(.+\\)",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
- name: "removeInnerQuotes"
|
- name: "removeInnerQuotes"
|
||||||
@@ -117,9 +131,8 @@ scraper:
|
|||||||
validation:
|
validation:
|
||||||
pattern: "^\".+\".*"
|
pattern: "^\".+\".*"
|
||||||
regional-name:
|
regional-name:
|
||||||
fallback:
|
|
||||||
default: "N/A"
|
|
||||||
type: int
|
type: int
|
||||||
|
nullable: true
|
||||||
extractors:
|
extractors:
|
||||||
- steps:
|
- steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
|
|||||||
Reference in New Issue
Block a user